[dasher] Chinese: unify treatment of punctuation, fix GetRoot



commit 1cc7fe35056a89a7c0eefce3225557351229ebe4
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date:   Thu Feb 10 11:20:31 2011 +0000

    Chinese: unify treatment of punctuation, fix GetRoot
    
    Punctuation included in CH alph (in singleton groups matching PY syms) & context
      => rm special casing & LAST_PY, combine CMand{Sym,Node}
    NB space & paragraph syms can't be in groups => identified by symbol displaytext
    
    CreateSymbolNode calls Create{ConvRoot/CHSymbol} => avoid singleton CConvRoots
    
    CHAlph syms coloured according to XML, fallback to colourStore[phase][sym%3]
    
    Mandarin overrides GetRoot to use Chinese alph for ctx syms, w/ code common w/
     AlphabetMgr separated out into GetContextSymbols, makeSymbol/makeGroup inlined
    
    Also set chinese alphabet default context (alphabet.chineseRuby.xml) to "",
      change displaytext of PY syms/groups "m"(2) -> &#x1E3F, "a"(5) -> &#x00E5
        (avoids conflict w/ roman letters a & m, which were "punctuation"!),
      & change chinese space symbol to &#x25a1; (box) to match pinyin.

 Data/alphabets/alphabet.chineseRuby.xml |  341 +++++++++++++++++++++++++------
 Data/alphabets/alphabet.spyDict.xml     |    6 +-
 Src/DasherCore/AlphabetManager.cpp      |  109 +++++------
 Src/DasherCore/AlphabetManager.h        |   16 +-
 Src/DasherCore/MandarinAlphMgr.cpp      |  204 ++++++++-----------
 Src/DasherCore/MandarinAlphMgr.h        |   67 ++++---
 6 files changed, 461 insertions(+), 282 deletions(-)
---
diff --git a/Data/alphabets/alphabet.chineseRuby.xml b/Data/alphabets/alphabet.chineseRuby.xml
index 0d2d29c..e5aeee3 100644
--- a/Data/alphabets/alphabet.chineseRuby.xml
+++ b/Data/alphabets/alphabet.chineseRuby.xml
@@ -2,76 +2,16 @@
 <!DOCTYPE alphabets SYSTEM "alphabet.dtd">
 <?xml-stylesheet type="text/xsl" href="alphabet.xsl"?>
 <alphabets>
-<alphabet name="Chinese &#31616;&#20307;&#20013;&#25991; (simplified chinese, in pin yin groups, and pinyin)"> <!-- Alphabet created by David MacKay using write.p.  Thanks to Juan K Lin for help -->
+<alphabet name="Chinese &#31616;&#20307;&#20013;&#25991; (simplified chinese, in pin yin groups)"> <!-- Alphabet created by David MacKay using write.p.  Thanks to Juan K Lin for help -->
+<!--ACL 10Feb2011: Pinyin characters removed (but pinyin groups retained), and "punctuation" inc. roman letters + numerals added, as per rewrite of MandarinDasher to include punctuation in context-->
 <orientation type="LR"/>
 <encoding type="Western"/>
 <control d="Control" t=""/>
 <palette>Default</palette>
 <train>training_chineseRuby_CN.txt</train>
-<space d="_" t=" " b="9" />
+<space d="&#x25a1;" t=" " b="9" />
 <paragraph d="&#182;" b="9"/>
-<group name="pin yin roman letters" b="0">
-<s d="&#97;" t="&#97;" />  <!-- a --> 
-<s  d="&#257;" t="&#257;" /> <!-- a1 -->
-<s  d="&#225;" t="&#225;" /> <!-- a2 -->
-<s  d="&#462;" t="&#462;" /> <!-- a3 -->
-<!-- alternative a3 &#259; -->
-<s  d="&#224;" t="&#224;" /> <!-- a4 -->
-<s d="b" t="b" b="11"/>
-<s d="c" t="c" b="12"/>
-<s d="d" t="d" b="13"/>
-<s  d="&#101;" t="&#101;" /> <!-- e -->
-<s  d="&#275;" t="&#275;" /> <!-- e1 -->
-<s  d="&#233;" t="&#233;" /> <!-- e2 -->
-<s  d="&#283;" t="&#283;" /> <!-- e3 -->
-<!-- alternative e3 &#277; -->
-<s  d="&#232;" t="&#232;" /> <!-- e4 -->
-<s d="f" t="f" b="15"/>
-<s d="g" t="g" b="16"/>
-<s d="h" t="h" b="17"/>
-<s  d="&#105;" t="&#105;" /> <!-- i -->
-<s  d="&#299;" t="&#299;" /> <!-- i1 -->
-<s  d="&#237;" t="&#237;" /> <!-- i2 -->
-<s  d="&#464;" t="&#464;" /> <!-- i3 -->
-<!-- alternative i3 &#301; -->
-<s  d="&#236;" t="&#236;" /> <!-- i4 -->
-<s d="j" t="j" b="19"/>
-<s d="k" t="k" b="20"/>
-<s d="l" t="l" b="21"/>
-<s d="m" t="m" b="22"/>
-<s d="n" t="n" b="23"/>
-<s  d="&#111;" t="&#111;" /> <!-- o -->
-<s  d="&#333;" t="&#333;" /> <!-- o1 -->
-<s  d="&#243;" t="&#243;" /> <!-- o2 -->
-<s  d="&#466;" t="&#466;" /> <!-- o3 -->
-<!-- alternative o3  &#335; -->
-<s  d="&#242;" t="&#242;" /> <!-- o4 -->
-<s d="p" t="p" b="25"/>
-<s d="q" t="q" b="26"/>
-<s d="r" t="r" b="27"/>
-<s d="s" t="s" b="28"/>
-<s d="t" t="t" b="29"/>
-<s  d="&#117;" t="&#117;" /> <!-- u -->
-<s  d="&#363;" t="&#363;" /> <!-- u1 -->
-<s  d="&#250;" t="&#250;" /> <!-- u2 -->
-<s  d="&#468;" t="&#468;" /> <!-- u3 -->
-<!-- alternative u3 &#365; -->
-<s  d="&#249;" t="&#249;" /> <!-- u4 -->
-<s  d="&#252;" t="&#252;" /> <!-- &#252; u: -->
-<s  d="&#470;" t="&#470;" /> <!-- &#252;1 u:1 -->
-<s  d="&#472;" t="&#472;" /> <!-- &#252;2 u:2 -->
-<s  d="&#474;" t="&#474;" /> <!-- &#252;3 u:3 -->
-<s  d="&#476;" t="&#476;" /> <!-- &#252;4 u:4 -->
-<s d="v" t="v" b="31"/>
-<s d="w" t="w" b="32"/>
-<s d="x" t="x" b="33"/>
-<s d="y" t="y" b="34"/>
-<s d="z" t="z" b="35"/>
-</group>
-<group name="convertors" b="110">
-<s b="109" d="&lt;" t="&lt;"/>
-<s b="105" d="&gt;" t="&gt;"/>
-</group>
+<context default=""/>
 <group name="&#257; (a1)" label="&#257;"  f="113"  b="114">
 <s d="&#x554A;" t="&#x554A;" />
 <s d="&#x963F;" t="&#x963F;" />
@@ -89,7 +29,9 @@
 <group name="&#224; (a4)" label="&#224;"  f="113"  b="117">
 <s d="&#x554A;" t="&#x554A;" />
 </group>
-<group name="&#97; (a5)" label="&#97;"  f="113"  b="118">
+<!--ACL using unicode "latin small letter a with ring above" to avoid conflict
+    with roman letter a-->
+<group name="&#97; (a5)" label="&#x00E5;"  f="113"  b="118">
 <s d="&#x554A;" t="&#x554A;" />
 <s d="&#x5475;" t="&#x5475;" />
 </group>
@@ -4891,7 +4833,9 @@
 <s d="&#x8DDE;" t="&#x8DDE;" />
 <s d="&#x96D2;" t="&#x96D2;" />
 </group>
-<group name="m (m2)" label="m"  f="113"  b="121">
+<!--ACL using unicode "latin small letter m with acute" to avoid conflict with
+    roman letter m-->
+<group name="m (m2)" label="&#x1E3F;"  f="113"  b="121">
 <s d="&#x5452;" t="&#x5452;" />
 </group>
 <group name="m&#257; (ma1)" label="m&#257;"  f="113"  b="122">
@@ -10253,5 +10197,270 @@
 <s d="&#x795A;" t="&#x795A;" />
 <s d="&#x9162;" t="&#x9162;" />
 </group>
+
+<!-- ACL idea of all further groups, is to match up with individual punctuation/roman characters from
+  the Pinyin alphabet, again by equality of display text. This should mean that when the user writes a
+  pinyin punctuation symbol, the equivalent/identical chinese character/symbol will be written instead,
+  and entered into the (chinese-character) language model context just as for other _chinese_ symbols. -->  
+<group name="Punctuation" b="112">
+<!--Mandarin (frequently used) punctuation -->
+<!--Reference: http://input.foruto.com/source/source_03.htm -->
+<!--Reference: http://www.zdic.net/appendix/f3.htm -->
+<group label="&#x3002;">
+<s d="&#x3002;" t="&#x3002;" b="17" visible="yes" note="Full stop"  />
+</group>
+<group label="&#xFF0C;">
+<s d="&#xFF0C;" t="&#xFF0C;" b="20" visible="yes" note="Comma" />
+</group>
+<group label="&#x3001;">
+<s d="&#x3001;" t="&#x3001;" b="17" visible="yes" note="Dun Hao (symbol of brief stop)"  />
+</group>
+<group label="&#xFF1B;">
+<s d="&#xFF1B;" t="&#xFF1B;" b="20" visible="yes" note="Semicolon" />
+</group>
+<group label="&#xFF1A;">
+<s d="&#xFF1A;"  t="&#xFF1A;"  b="17" visible="yes" note="Colon" />
+</group>
+<group label="&#xFF1F;">
+<s d="&#xFF1F;" t="&#xFF1F;" b="20" visible="yes" note="Question mark"/>
+</group>
+<group label="&#xFF01;">
+<s d="&#xFF01;" t="&#xFF01;" b="17" visible="yes" note="Exclamation mark"/>
+</group>
+<group label="&#x201C;">
+<s d="&#x201C;" t="&#x201C;"   b="20" visible="yes" note="Double quotation mark (left)"/>
+</group>
+<group label="&#x201D;">
+<s d="&#x201D;" t="&#x201D;"   b="17" visible="yes" note="Double quotation mark (right)"/>
+</group>
+<group label="&#x2018;">
+<s d="&#x2018;" t="&#x2018;" b="20" visible="yes" note="Single quotation mark (left)"/>
+</group>
+<group label="&#x2019;">
+<s d="&#x2019;" t="&#x2019;" b="17" visible="yes" note="Single quotation mark (right)"/>
+</group>
+<group label="&#xFF08;">
+<s d="&#xFF08;" t="&#xFF08;" b="20" visible="yes" note="Left bracket"/>
+</group>
+<group label="&#xFF09;">
+<s d="&#xFF09;" t="&#xFF09;" b="17" visible="yes" note="Right bracket" />
+</group>
+<group label="&#x2026;">
+<s d="&#x2026;" t="&#x2026;" b="20" visible="yes" note="Elipses" />
+</group>
+<group label="&#x2014;">
+<s d="&#x2014;" t="&#x2014;" b="17" visible="yes" note="Long dash" />
+</group>
+<group label="&#x300A;">
+<s d="&#x300A;" t="&#x300A;" b="20" visible="yes" note="Double title (reference) brackets (left)" />
+</group>
+<group label="&#x300B;">
+<s d="&#x300B;" t="&#x300B;" b="17" visible="yes" note="Double title (reference) brackets (right)" />
+</group>
+<group label="&#x3008;">
+<s d="&#x3008;" t="&#x3008;" b="20" visible="yes" note="Single title (reference) brackets (left)" />
+</group>
+<group label="&#x3009;">
+<s d="&#x3009;" t="&#x3009;" b="17" visible="yes" note="Single title (reference) brackets (right)" />
+</group>
+<group label="&#xFF0E;">
+<s d="&#xFF0E;" t="&#xFF0E;" b="20" visible="yes" note="Separation dot" />
+</group>
+</group><!--Punctuation-->
+<group name="Roman alphabet and numbers" b="112" visible="yes">
+<group name="Lower case Latin letters" b="0">
+<group label="a">
+<s d="a" t="a" b="10" />
+</group>
+<group label="b">
+<s d="b" t="b" b="11" />
+</group>
+<group label="c">
+<s d="c" t="c" b="12" />
+</group>
+<group label="d">
+<s d="d" t="d" b="13" />
+</group>
+<group label="e">
+<s d="e" t="e" b="14" />
+</group>
+<group label="f">
+<s d="f" t="f" b="15" />
+</group>
+<group label="g">
+<s d="g" t="g" b="16" />
+</group>
+<group label="h">
+<s d="h" t="h" b="17" />
+</group>
+<group label="i">
+<s d="i" t="i" b="18" />
+</group>
+<group label="j">
+<s d="j" t="j" b="19" />
+</group>
+<group label="k">
+<s d="k" t="k" b="20" />
+</group>
+<group label="l">
+<s d="l" t="l" b="21" />
+</group>
+<group label="m">
+<s d="m" t="m" b="22" />
+</group>
+<group label="n">
+<s d="n" t="n" b="23" />
+</group>
+<group label="o">
+<s d="o" t="o" b="24" />
+</group>
+<group label="p">
+<s d="p" t="p" b="25" />
+</group>
+<group label="q">
+<s d="q" t="q" b="26" />
+</group>
+<group label="r">
+<s d="r" t="r" b="27" />
+</group>
+<group label="s">
+<s d="s" t="s" b="28" />
+</group>
+<group label="t">
+<s d="t" t="t" b="29" />
+</group>
+<group label="u">
+<s d="u" t="u" b="30" />
+</group>
+<group label="v">
+<s d="v" t="v" b="31" />
+</group>
+<group label="w">
+<s d="w" t="w" b="32" />
+</group>
+<group label="x">
+<s d="x" t="x" b="33" />
+</group>
+<group label="y">
+<s d="y" t="y" b="34" />
+</group>
+<group label="z">
+<s d="z" t="z" b="35" />
+</group>
+</group><!--lowercase-->
+<group name="Upper case Latin letters" b="111">
+<group label="A">
+<s d="A" t="A" b="10" />
+</group>
+<group label="B">
+<s d="B" t="B" b="11" />
+</group>
+<group label="C">
+<s d="C" t="C" b="12" />
+</group>
+<group label="D">
+<s d="D" t="D" b="13" />
+</group>
+<group label="E">
+<s d="E" t="E" b="14" />
+</group>
+<group label="F">
+<s d="F" t="F" b="15" />
+</group>
+<group label="G">
+<s d="G" t="G" b="16" />
+</group>
+<group label="H">
+<s d="H" t="H" b="17" />
+</group>
+<group label="I">
+<s d="I" t="I" b="18" />
+</group>
+<group label="J">
+<s d="J" t="J" b="19" />
+</group>
+<group label="K">
+<s d="K" t="K" b="20" />
+</group>
+<group label="L">
+<s d="L" t="L" b="21" />
+</group>
+<group label="M">
+<s d="M" t="M" b="22" />
+</group>
+<group label="N">
+<s d="N" t="N" b="23" />
+</group>
+<group label="O">
+<s d="O" t="O" b="24" />
+</group>
+<group label="P">
+<s d="P" t="P" b="25" />
+</group>
+<group label="Q">
+<s d="Q" t="Q" b="26" />
+</group>
+<group label="R">
+<s d="R" t="R" b="27" />
+</group>
+<group label="S">
+<s d="S" t="S" b="28" />
+</group>
+<group label="T">
+<s d="T" t="T" b="29" />
+</group>
+<group label="U">
+<s d="U" t="U" b="30" />
+</group>
+<group label="V">
+<s d="V" t="V" b="31" />
+</group>
+<group label="W">
+<s d="W" t="W" b="32" />
+</group>
+<group label="X">
+<s d="X" t="X" b="33" />
+</group>
+<group label="Y">
+<s d="Y" t="Y" b="34" />
+</group>
+<group label="Z">
+<s d="Z" t="Z" b="35" />
+</group>
+</group><!--uppercase-->
+<group name="Numbers" b="113">
+<group label="1">
+<s d="1" t="1" b="90" />
+</group>
+<group label="2">
+<s d="2" t="2" b="91" />
+</group>
+<group label="3">
+<s d="3" t="3" b="92" />
+</group>
+<group label="4">
+<s d="4" t="4" b="93" />
+</group>
+<group label="5">
+<s d="5" t="5" b="94" />
+</group>
+<group label="6">
+<s d="6" t="6" b="95" />
+</group>
+<group label="7">
+<s d="7" t="7" b="96" />
+</group>
+<group label="8">
+<s d="8" t="8" b="97" />
+</group>
+<group label="9">
+<s d="9" t="9" b="98" />
+</group>
+<group label="0">
+<s d="0" t="0" b="99" />
+</group>
+</group><!--numbers-->
+</group><!--all roman-->
+
 </alphabet>
 </alphabets>
diff --git a/Data/alphabets/alphabet.spyDict.xml b/Data/alphabets/alphabet.spyDict.xml
index fd28397..f4b9fea 100644
--- a/Data/alphabets/alphabet.spyDict.xml
+++ b/Data/alphabets/alphabet.spyDict.xml
@@ -27,7 +27,8 @@
 <s d="&#224;" t="&#x3801;" b="65" visible="no"/>
 </group>
 <group name="a5" label="5" b="66" visible="yes">
-<s d="&#97;" t="&#x5475;" b="67" visible="no"/>
+<!--ACL using unicode "latin small letter a with ring above" to avoid confusion with roman letter a-->
+<s d="&#x00E5;" t="&#x5475;" b="67" visible="no"/>
 </group>
 <group name="ai" label="i" b="68" visible="yes">
 <group name="ai1" label="1" b="69" visible="yes">
@@ -2275,7 +2276,8 @@
 </group>
 <group name="m" label="m" b="90" visible="yes">
 <group name="m2" label="2" b="57" visible="yes">
-<s d="m" t="&#x5452;" b="58" visible="no"/>
+<!-- ACL using unicode "latin small letter m with acute" to avoid conflict with roman letter m -->
+<s d="&#x1E3F;" t="&#x5452;" b="58" visible="no"/>
 </group>
 <group name="ma" label="a" b="59" visible="yes">
 <group name="ma1" label="1" b="60" visible="yes">
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index 14bc66c..21bdf51 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -135,72 +135,62 @@ CAlphabetManager::CGroupNode::CGroupNode(CDasherNode *pParent, int iOffset, unsi
             pGroup ? strEnc+pGroup->strLabel : strEnc, pMgr), m_pGroup(pGroup) {
 }
 
-CAlphabetManager::CSymbolNode *CAlphabetManager::makeSymbol(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, int iBkgCol, symbol iSymbol) {
-  return new CSymbolNode(pParent, iOffset, iLbnd, iHbnd, strGroup, this, iSymbol);
-}
-
-CAlphabetManager::CGroupNode *CAlphabetManager::makeGroup(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strEnc, int iBkgCol, const SGroupInfo *pGroup) {
-  return new CGroupNode(pParent, iOffset, iLbnd, iHbnd, strEnc, iBkgCol, this, pGroup);
-}
-
 CAlphabetManager::CAlphNode *CAlphabetManager::GetRoot(CDasherNode *pParent, unsigned int iLower, unsigned int iUpper, bool bEnteredLast, int iOffset) {
 
   int iNewOffset(max(-1,iOffset-1));
-  
-  std::vector<symbol> vContextSymbols;
-  // TODO: make the LM get the context, rather than force it to fix max context length as an int
-  int iStart = max(0, iNewOffset - m_pLanguageModel->GetContextLength());
-  
-  if(pParent) {
-    pParent->GetContext(m_pInterface, m_pAlphabetMap, vContextSymbols, iStart, iNewOffset+1 - iStart);
-  } else {
-    std::string strContext = (iNewOffset == -1) 
-      ? m_pAlphabet->GetDefaultContext()
-      : m_pInterface->GetContext(iStart, iNewOffset+1 - iStart);
-    m_pAlphabetMap->GetSymbols(vContextSymbols, strContext);
-  }
+
+  pair<symbol, CLanguageModel::Context> p = GetContextSymbols(pParent, iNewOffset, m_pAlphabetMap);
   
   CAlphNode *pNewNode;
-  CLanguageModel::Context iContext = m_pLanguageModel->CreateEmptyContext();
-  
-  std::vector<symbol>::iterator it = vContextSymbols.end();
-  while (it!=vContextSymbols.begin()) {
-    if (*(--it) == 0) {
-      //found an impossible symbol! start after it
-      ++it;
-      break;
-    }
-  }
-  if (it == vContextSymbols.end()) {
-    //previous character was not in the alphabet!
-    //can't construct a node "responsible" for entering it
-    bEnteredLast=false;
-    //instead, Create a node as if we were starting a new sentence...
-    vContextSymbols.clear();
-    m_pAlphabetMap->GetSymbols(vContextSymbols, m_pAlphabet->GetDefaultContext());
-    it = vContextSymbols.begin();
-    //TODO: What it the default context somehow contains symbols not in the alphabet?
-  }
-  //enter the symbols we could make sense of, into the LM context...
-  while (it != vContextSymbols.end()) {
-    m_pLanguageModel->EnterSymbol(iContext, *(it++));
-  }
-  
-  if(!bEnteredLast) {
-    pNewNode = makeGroup(pParent, iNewOffset, iLower, iUpper, "", 0, NULL);
+  if(p.first==0 || !bEnteredLast) {
+    //couldn't extract last symbol (so probably using default context), or shouldn't
+    pNewNode = new CGroupNode(pParent, iNewOffset, iLower, iUpper, "", 0, this, NULL); //default background colour
   } else {
-    const symbol iSymbol(vContextSymbols[vContextSymbols.size() - 1]);
-    pNewNode = makeSymbol(pParent, iNewOffset, iLower, iUpper, "", 0, iSymbol);
+    pNewNode = new CSymbolNode(pParent, iNewOffset, iLower, iUpper, "", this, p.first);
     //if the new node is not child of an existing node, then it
     // represents a symbol that's already happened - so we're either
     // going backwards (rebuildParent) or creating a new root after a language change
     DASHER_ASSERT (!pParent);
   }
 
-  pNewNode->iContext = iContext;
+  pNewNode->iContext = p.second;
   return pNewNode;
 }
 
+pair<symbol, CLanguageModel::Context> CAlphabetManager::GetContextSymbols(CDasherNode *pParent, int iRootOffset, const CAlphabetMap *pAlphMap) {
+  vector<symbol> vContextSymbols; bool bHaveFinalSymbol = true;
+  //no context is ever available at offset -1 (=choice between symbols with offset 0)
+  if (iRootOffset!=-1) {
+    // TODO: make the LM get the context, rather than force it to fix max context length as an int
+    int iStart = max(0, iRootOffset - m_pLanguageModel->GetContextLength());
+    if(pParent) {
+      pParent->GetContext(m_pInterface, pAlphMap, vContextSymbols, iStart, iRootOffset+1 - iStart);
+    } else {
+      pAlphMap->GetSymbols(vContextSymbols, m_pInterface->GetContext(iStart, iRootOffset+1 - iStart));
+    }
+  
+    for (std::vector<symbol>::iterator it = vContextSymbols.end(); it!=vContextSymbols.begin();) {
+      if (*(--it) == 0) {
+        //found an impossible symbol! erase from beginning up to it (inclusive)
+        vContextSymbols.erase(vContextSymbols.begin(), ++it);
+        break;
+      }
+    }
+  }
+  if (vContextSymbols.empty()) {
+    bHaveFinalSymbol = false;
+    pAlphMap->GetSymbols(vContextSymbols, m_pAlphabet->GetDefaultContext());
+  }
+  
+  CLanguageModel::Context iContext = m_pLanguageModel->CreateEmptyContext();
+  
+  //enter the symbols we could make sense of, into the LM context...
+  for (vector<symbol>::iterator it=vContextSymbols.begin(); it != vContextSymbols.end(); it++) {
+    m_pLanguageModel->EnterSymbol(iContext, *it);
+  }
+  return pair<symbol,CLanguageModel::Context>(bHaveFinalSymbol ? vContextSymbols[vContextSymbols.size()-1] : 0, iContext);
+}
+
 bool CAlphabetManager::CSymbolNode::GameSearchNode(string strTargetUtf8Char) {
   if (m_pMgr->m_pAlphabet->GetText(iSymbol) == strTargetUtf8Char) {
     SetFlag(NF_GAME, true);
@@ -314,7 +304,8 @@ CAlphabetManager::CGroupNode *CAlphabetManager::CreateGroupNode(CAlphNode *pPare
 
   // When creating a group node...
   // ...the offset is the same as the parent...
-  CGroupNode *pNewNode = makeGroup(pParent, pParent->offset(), iLbnd, iHbnd, strEnc, iBkgCol, pInfo);
+  
+  CGroupNode *pNewNode = new CGroupNode(pParent, pParent->offset(), iLbnd, iHbnd, strEnc, iBkgCol, this, pInfo);
 
   //...as is the context!
   pNewNode->iContext = m_pLanguageModel->CloneContext(pParent->iContext);
@@ -346,13 +337,6 @@ CDasherNode *CAlphabetManager::CSymbolNode::RebuildGroup(CAlphNode *pParent, uns
   return pRet;
 }
 
-CLanguageModel::Context CAlphabetManager::CreateSymbolContext(CAlphNode *pParent, symbol iSymbol)
-{
-  CLanguageModel::Context iContext = m_pLanguageModel->CloneContext(pParent->iContext);
-  m_pLanguageModel->EnterSymbol(iContext, iSymbol); // TODO: Don't use symbols?
-  return iContext;
-}
-
 CDasherNode *CAlphabetManager::CreateSymbolNode(CAlphNode *pParent, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, int iBkgCol, symbol iSymbol) {
 
     // TODO: Exceptions / error handling in general
@@ -361,15 +345,16 @@ CDasherNode *CAlphabetManager::CreateSymbolNode(CAlphNode *pParent, unsigned int
     // (and we can't call numChars() on the symbol before we've constructed it!)
     int iNewOffset = pParent->offset()+1;
     if (m_pAlphabet->GetText(iSymbol)=="\r\n") iNewOffset++;
-    CSymbolNode *pAlphNode = makeSymbol(pParent, iNewOffset, iLbnd, iHbnd, strGroup, iBkgCol, iSymbol);
-
+    CSymbolNode *pAlphNode = new CSymbolNode(pParent, iNewOffset, iLbnd, iHbnd, "", this, iSymbol);
+  
     //     std::stringstream ssLabel;
 
     //     ssLabel << m_pAlphabet->GetDisplayText(iSymbol) << ": " << pNewNode;
 
     //    pDisplayInfo->strDisplayText = ssLabel.str();
 
-    pAlphNode->iContext = CreateSymbolContext(pParent, iSymbol);
+    pAlphNode->iContext = m_pLanguageModel->CloneContext(pParent->iContext);
+    m_pLanguageModel->EnterSymbol(pAlphNode->iContext, iSymbol); // TODO: Don't use symbols?
 
   return pAlphNode;
 }
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index a8e8aa8..de40cba 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -145,16 +145,22 @@ namespace Dasher {
     const CAlphInfo *GetAlphabet() const;
     
   protected:
-    /// Factory method for CAlphNode construction, so subclasses can override.
-    virtual CSymbolNode *makeSymbol(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, int iBkgCol, symbol iSymbol);
-    virtual CGroupNode *makeGroup(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strEnc, int iBkgCol, const SGroupInfo *pGroup);
-
+    ///Called to get the symbols in the context for (preceding) a new node
+    /// \param pParent node to assume has been output, when obtaining context
+    /// \param iRootOffset offset of the node that will be constructed; i.e. context should include symbols
+    /// up to & including this offset.
+    /// \param pAlphMap use to convert entered text into symbol numbers
+    /// (could be the managers m_pAlphabetMap, but subclasses can pass in something different)
+    /// \return pair: first element is the last symbol in the context, _if_ a usable context
+    /// could be extracted, else 0 (=> couldn't get context, using alphabet default); second
+    /// element is the result of entering the symbols retrieved, into a fresh LM context.
+    std::pair<symbol, CLanguageModel::Context> GetContextSymbols(CDasherNode *pParent, int iRootOffset, const CAlphabetMap *pAlphMap);
+    
     ///Called to create a node for a given symbol (leaf), as a child of a specified parent node
     /// \param strGroup caption of any group containing this node, that will not be created:
     /// thus, should be prepended onto the caption of the node created.
     /// \param iBkgCol colour behind the new node, i.e. that should show through if the node is transparent
     virtual CDasherNode *CreateSymbolNode(CAlphNode *pParent, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, int iBkgCol, symbol iSymbol);
-    virtual CLanguageModel::Context CreateSymbolContext(CAlphNode *pParent, symbol iSymbol);
     virtual CGroupNode *CreateGroupNode(CAlphNode *pParent, unsigned int iLbnd, unsigned int iHbnd, const std::string &strEnc, int iBkgCol, const SGroupInfo *pInfo);
 
     ///Called to add any non-alphabet (non-symbol) children to a top-level node (root or symbol).
diff --git a/Src/DasherCore/MandarinAlphMgr.cpp b/Src/DasherCore/MandarinAlphMgr.cpp
index 1c0e2d3..1f35f34 100644
--- a/Src/DasherCore/MandarinAlphMgr.cpp
+++ b/Src/DasherCore/MandarinAlphMgr.cpp
@@ -46,16 +46,11 @@ static char THIS_FILE[] = __FILE__;
 #endif
 #endif
 
-//the index of the last syllable+tone symbol in the pinyin alphabet; later symbols are "punctuation"
-// and do not correspond to groups in the chinese alphabet.
-#define LAST_PY 1286
-
 CMandarinAlphMgr::CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphMap)
   : CAlphabetManager(pInterface, pNCManager, pAlphabet, pAlphMap),
-    m_pCHAlphabet(pInterface->GetInfo("Chinese ç®?ä½?中æ?? (simplified chinese, in pin yin groups, and pinyin)")),
+    m_pCHAlphabet(pInterface->GetInfo("Chinese ç®?ä½?中æ?? (simplified chinese, in pin yin groups)")),
     m_pCHAlphabetMap(m_pCHAlphabet->MakeMap()),
-    m_pConversionsBySymbol(new set<symbol>[LAST_PY+1]) {
-  
+    m_pConversionsBySymbol(new set<symbol>[GetAlphabet()->GetNumberTextSymbols()+1]) {
   //the CHAlphabet contains a group for each SPY syllable+tone, with symbols being chinese characters.
   // Build a map from SPY to set of chinese chars (note, the same chinese unicode can occur in multiple places;
   // hence, we represent as unicode not CHAlph symbol number)...
@@ -71,23 +66,32 @@ CMandarinAlphMgr::CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreati
     // such equivalences are recorded in the xml 'name' attribute of the group, but we don't need that.
     if (pGroup->strLabel.length()) {
       set<string> &chars(conversions[pGroup->strLabel]);
+      DASHER_ASSERT(chars.empty()); //no previous group with same label
       for (int ch=pGroup->iStart; ch<pGroup->iEnd; ch++)
         chars.insert(m_pCHAlphabet->GetText(ch));
     }
   }
+  //Dasher's alphabet format means that space and paragraph can't be put into groups,
+  // so the above will skip them. Hence, add them using the _symbol_ display text:
+  if (symbol sp = m_pCHAlphabet->GetSpaceSymbol())
+    conversions[m_pCHAlphabet->GetDisplayText(sp)].insert(m_pCHAlphabet->GetText(sp));
+  if (symbol para = m_pCHAlphabet->GetParagraphSymbol())
+    conversions[m_pCHAlphabet->GetDisplayText(para)].insert(m_pCHAlphabet->GetText(para));
 
   //Now: symbols in the primary (SPY) alphabet are syllable+tone, with the string SPY description
   // (using unicode tone marks, e.g. &#257;) in the display text, matching up with the CHAlphabet groups. 
   // (The SPY symbols are arranged in hierarchical groups according to the numbered-tone version, e.g. "a1";
   // but we don't do anything special with those groups, they are just displayed on screen as any normal alphabet).
-  for (int i=1; i<=LAST_PY; i++) {
+  //Punctuation is the same way, i.e. PYAlph symbol w/ displaytext "," maps to the CHAlphabel group w/ label ","
+  std::vector<symbol> vSyms;
+  for (symbol i=1; i<=GetAlphabet()->GetNumberTextSymbols(); i++) {
     set<string> &convs(conversions[m_pAlphabet->GetDisplayText(i)]);
     DASHER_ASSERT(!convs.empty());
     //convert each of these chinese unicode characters into a CHAlphabet symbol...
     for (set<string>::const_iterator it=convs.begin(); it!=convs.end(); it++) {
-      std::vector<symbol> vSyms;
+      vSyms.clear();
       m_pCHAlphabetMap->GetSymbols(vSyms, *it);
-      DASHER_ASSERT(vSyms.size()==1); //does it ever happen? if so, Will's code would effectively push -1
+      DASHER_ASSERT(vSyms.size()==1 && vSyms[0]!=0); //i.e. conversion is exactly one chinese symbol
       DASHER_ASSERT(m_pCHAlphabet->GetText(vSyms[0]) == *it);
       m_pConversionsBySymbol[i].insert(vSyms[0]);
     }
@@ -112,40 +116,69 @@ CTrainer *CMandarinAlphMgr::GetTrainer() {
   return new CMandarinTrainer(m_pLanguageModel, m_pAlphabetMap, m_pCHAlphabetMap);
 }
 
-CDasherNode *CMandarinAlphMgr::CreateSymbolNode(CAlphNode *pParent, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, int iBkgCol, symbol iSymbol) {
-
-  if (iSymbol <= LAST_PY) {
-    //Will wrote:
-    //Modified for Mandarin Dasher
-    //The following logic switch allows punctuation nodes in Mandarin to be treated in the same way as English (i.e. display and populate next round) instead of invoking a conversion node
-    //ACL I think by "the following logic switch" he meant that symbols <= 1288 are "normal" nodes, NOT punctuation nodes,
-    // whereas punctuation is handled by the fallthrough case (standard AlphabetManager CreateSymbolNode)
+CAlphabetManager::CAlphNode *CMandarinAlphMgr::GetRoot(CDasherNode *pParent, unsigned int iLower, unsigned int iUpper, bool bEnteredLast, int iOffset) {
 
-    /*old code:
-     * CDasherNode *pNewNode = m_pNCManager->GetConvRoot(pParent, iLbnd, iHbnd, pParent->m_iOffset+1);
-	   * static_cast<CPinYinConversionHelper::CPYConvNode *>(pNewNode)->SetConvSymbol(iSymbol);
-	   * return pNewNode;
-     */
-    
-    //The conversions parallels old PinyinConversionHelper's SetConvSymbol, except
-    // we've already resolved the mapping from what was the symbol's displaytext
-    // (SPY syllable+tone) to the relevant set of chinese symbols.
-    
-    // the same offset as we've still not entered/selected a symbol (leaf)
-    CConvRoot *pNewNode = new CConvRoot(pParent, pParent->offset(), iLbnd, iHbnd, strGroup, this, &m_pConversionsBySymbol[iSymbol]);
+  int iNewOffset(max(-1,iOffset-1));  
+  // Use chinese alphabet, not pinyin...
+  pair<symbol, CLanguageModel::Context> p=GetContextSymbols(pParent, iNewOffset, m_pCHAlphabetMap);
 
-    //from ConversionHelper:
-    //pNewNode->m_pLanguageModel = m_pLanguageModel;
-    pNewNode->iContext = m_pLanguageModel->CloneContext(pParent->iContext);
+  CAlphNode *pNewNode;
+  if (p.first==0 || !bEnteredLast) {
+    pNewNode = new CGroupNode(pParent, iNewOffset, iLower, iUpper, "", 0, this, NULL);
+  } else {
+    DASHER_ASSERT(p.first>0 && p.first<=m_pCHAlphabet->GetNumberTextSymbols());
+    pNewNode = new CMandSym(pParent, iNewOffset, iLower, iUpper,  "", this, p.first);
+  }
+  pNewNode->iContext = p.second;
+  
+  return pNewNode;
+}
 
-	  return pNewNode;
+int CMandarinAlphMgr::GetCHColour(symbol CHsym, int iOffset) const {
+  int iColour = m_pCHAlphabet->GetColour(CHsym);
+  if (iColour==-1) {
+    //none specified in alphabet
+    static int colourStore[2][3] = {
+      {66,//light blue
+        64,//very light green
+        62},//light yellow
+      {78,//light purple
+        81,//brownish
+        60},//red
+    };    
+    return colourStore[iOffset&1][CHsym % 3];
   }
-  return CAlphabetManager::CreateSymbolNode(pParent, iLbnd, iHbnd, strGroup, iBkgCol, iSymbol);
+  if ((iOffset&1)==0 && iColour<130) iColour+=130;
+  return iColour;
+}
+
+CDasherNode *CMandarinAlphMgr::CreateSymbolNode(CAlphNode *pParent, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, int iBkgCol, symbol iSymbol) {
+  
+  //For every PY symbol (=syllable+tone, or "punctuation"),
+  // m_pConversionsBySymbol identifies the possible chinese-alphabet symbols
+  // that have that syll+tone (for punctuation, this'll be a singleton: the identical
+  // punctuation character in the chinese alphabet). A CConvRoot thus offers a choice between them...
+  
+  if (m_pConversionsBySymbol[iSymbol].size()>1)
+    return CreateConvRoot(pParent, iLbnd, iHbnd, strGroup, iSymbol);
+  
+  return CreateCHSymbol(pParent,pParent->iContext, iLbnd, iHbnd, strGroup, *(m_pConversionsBySymbol[iSymbol].begin()));
+}
+
+CMandarinAlphMgr::CConvRoot *CMandarinAlphMgr::CreateConvRoot(CAlphNode *pParent, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, symbol iPYsym) {
+  
+  // the same offset as we've still not entered/selected a symbol (leaf);
+  // Colour is always 9 so ignore iBkgCol
+  CConvRoot *pConv = new CConvRoot(pParent, pParent->offset(), iLbnd, iHbnd, strGroup, this, &m_pConversionsBySymbol[iPYsym]);
+  
+  // and use the same context too (pinyin syll+tone is _not_ used as part of the LM context)
+  pConv->iContext = m_pLanguageModel->CloneContext(pParent->iContext);
+  return pConv;
 }
 
 CMandarinAlphMgr::CConvRoot::CConvRoot(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, CMandarinAlphMgr *pMgr, const set<symbol> *pConversions)
 : CDasherNode(pParent, iOffset, iLbnd, iHbnd, 9, strGroup), m_pMgr(pMgr), m_pConversions(pConversions) {
-  DASHER_ASSERT(pConversions);
+  DASHER_ASSERT(pConversions && pConversions->size()>1);
   //colour + label from ConversionManager.
 }
 
@@ -163,44 +196,31 @@ void CMandarinAlphMgr::CConvRoot::PopulateChildren() {
     m_pMgr->AssignSizes(m_vChInfo, iContext);
   }
   
-  int iIdx(0);
   int iCum(0);
   
-  //    int parentClr = pNode->Colour();
-  // TODO: Fixme
-  int parentClr = 0;
   // Finally loop through and create the children
   for (vector<pair<symbol, unsigned int> >::const_iterator it = m_vChInfo.begin(); it!=m_vChInfo.end(); it++) {
     //      std::cout << "Current scec: " << pCurrentSCEChild << std::endl;
-    unsigned int iLbnd(iCum);
-    unsigned int iHbnd(iCum + it->second);
+    const unsigned int iLbnd(iCum), iHbnd(iCum + it->second);
     
     iCum = iHbnd;
+    CMandSym *pNewNode = mgr()->CreateCHSymbol(this, this->iContext, iLbnd, iHbnd, "", it->first);
     
-    // TODO: Parameters here are placeholders - need to figure out
-    // what's right    
-    
-    int iColour(m_vChInfo.size()==1 ? getColour() : m_pMgr->AssignColour(parentClr, iIdx));
-    
-    //  std::cout << "#" << pCurrentSCEChild->pszConversion << "#" << std::endl;
-    
-    CMandNode *pNewNode = new CMandSym(this, offset()+1, iLbnd, iHbnd, iColour, m_pMgr, it->first);
-    
-    // TODO: Reimplement ----
-    
-    // FIXME - handle context properly
-    //      pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
-    // -----
-    
-    pNewNode->iContext = m_pMgr->m_pLanguageModel->CloneContext(this->iContext);
-      
-    m_pMgr->m_pLanguageModel->EnterSymbol(pNewNode->iContext, it->first); // TODO: Don't use symbols?      
-      
     DASHER_ASSERT(GetChildren().back()==pNewNode);
     
-    ++iIdx;
   }
-  
+}
+
+CMandarinAlphMgr::CMandSym *CMandarinAlphMgr::CreateCHSymbol(CDasherNode *pParent, CLanguageModel::Context iContext, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, symbol iCHsym) {
+  // TODO: Parameters here are placeholders - need to figure out
+  // what's right 
+
+  int iNewOffset = pParent->offset()+1;
+  if (m_pCHAlphabet->GetText(iCHsym) == "\r\n") iNewOffset++;
+  CMandSym *pNewNode = new CMandSym(pParent, iNewOffset, iLbnd, iHbnd, strGroup, this, iCHsym);
+  pNewNode->iContext = m_pLanguageModel->CloneContext(iContext);
+  m_pLanguageModel->EnterSymbol(pNewNode->iContext, iCHsym);
+  return pNewNode;
 }
 
 void CMandarinAlphMgr::AssignSizes(std::vector<pair<symbol,unsigned int> > &vChildren, Dasher::CLanguageModel::Context context) {
@@ -281,54 +301,11 @@ void CMandarinAlphMgr::AssignSizes(std::vector<pair<symbol,unsigned int> > &vChi
   
 }
 
-static int colourStore[2][3] = {
-  {66,//light blue
-    64,//very light green
-    62},//light yellow
-  {78,//light purple
-    81,//brownish
-    60},//red
-};
-
-//Pulled from CConversionHelper, where it's described as "needing a rethink"...
-int CMandarinAlphMgr::AssignColour(int parentClr, int childIndex) {
-  int which = -1;
-  
-  for (int i=0; i<2; i++)
-    for(int j=0; j<3; j++)
-      if (parentClr == colourStore[i][j])
-        which = i;
-  
-  if(which == -1)
-    return colourStore[0][childIndex%3];
-  else if(which == 0)
-    return colourStore[1][childIndex%3];
-  else 
-    return colourStore[0][childIndex%3]; 
+CMandarinAlphMgr::CMandSym::CMandSym(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, CMandarinAlphMgr *pMgr, symbol iSymbol)
+: CSymbolNode(pParent, iOffset, iLbnd, iHbnd, pMgr->GetCHColour(iSymbol,iOffset), strGroup+pMgr->m_pCHAlphabet->GetDisplayText(iSymbol), pMgr, iSymbol) {
 }
 
-
-CLanguageModel::Context CMandarinAlphMgr::CreateSymbolContext(CAlphNode *pParent, symbol iSymbol)
-{
-	//Context carry-over. This code may worth looking at debug
-	return m_pLanguageModel->CloneContext(pParent->iContext);
-}
-
-CMandarinAlphMgr::CMandNode::CMandNode(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const string &strGroup, CMandarinAlphMgr *pMgr, symbol iSymbol)
-: CSymbolNode(pParent, iOffset, iLbnd, iHbnd, strGroup, pMgr, iSymbol) {
-}
-
-CMandarinAlphMgr::CMandNode::CMandNode(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, int iColour, const string &strDisplayText, CMandarinAlphMgr *pMgr, symbol iSymbol)
-: CSymbolNode(pParent, iOffset, iLbnd, iHbnd, iColour, strDisplayText, pMgr, iSymbol) {
-}
-
-CMandarinAlphMgr::CMandNode *CMandarinAlphMgr::makeSymbol(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, int iBkgCol, symbol iSymbol) {
-  // Override standard symbol factory method, called by superclass, to make CMandNodes
-  //  - important only to disable learn-as-you-write...
-  return new CMandNode(pParent, iOffset, iLbnd, iHbnd, strGroup, this, iSymbol);
-}
-
-void CMandarinAlphMgr::CMandNode::SetFlag(int iFlag, bool bValue) {
+void CMandarinAlphMgr::CMandSym::SetFlag(int iFlag, bool bValue) {
   //``disable learn-as-you-write for Mandarin Dasher''
    if (iFlag==NF_COMMITTED)
      CDasherNode::SetFlag(iFlag, bValue); //bypass CAlphNode setter!
@@ -336,15 +313,6 @@ void CMandarinAlphMgr::CMandNode::SetFlag(int iFlag, bool bValue) {
       CAlphNode::SetFlag(iFlag, bValue);
 }
 
-// For converted chinese symbols, we construct instead CMandSyms...
-CMandarinAlphMgr::CMandSym::CMandSym(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, int iColour, CMandarinAlphMgr *pMgr, symbol iSymbol)
-: CMandNode(pParent, iOffset, iLbnd, iHbnd, iColour, pMgr->m_pCHAlphabet->GetText(iSymbol), pMgr, iSymbol) {
-  //Note we passed a custom label into superclass constructor:
-  // the chinese characters are in the _text_ (not label - that's e.g. "liang4")
-  // of the alphabet (& the pszConversion from PinyinParser was converted to symbol
-  // by CAlphabet::GetSymbols, which does string->symbol by _text_; we're reversing that)
-}
-
 const std::string &CMandarinAlphMgr::CMandSym::outputText() {
   //use chinese, not pinyin, alphabet...
   return mgr()->m_pCHAlphabet->GetText(iSymbol);
diff --git a/Src/DasherCore/MandarinAlphMgr.h b/Src/DasherCore/MandarinAlphMgr.h
index e4f86e2..cc2ed14 100644
--- a/Src/DasherCore/MandarinAlphMgr.h
+++ b/Src/DasherCore/MandarinAlphMgr.h
@@ -43,33 +43,25 @@ namespace Dasher {
     ///ACL: returns a MandarinTrainer too.
     CTrainer *GetTrainer();
     
-    /*ACL note: used to override GetRoot,
-     to attempt to clone the context of the previous node
-     in the case that the previous node was a PinyinConversionHelper node
-     (the common case - when a "conversion" was performed and chinese symbols reached,
-     it then 'escaped' back to the alphabet manager root by calling GetAlphRoot...)
-     Since this is no longer necessary (chinese symbol nodes are alph nodes directly,
-     so subsume the previous role of alph 'root's rather than contain them),
-     I don't think we need to override GetRoot anymore...?!?! */
+    //Override to use chinese, not pinyin, alphabet to turn extracted text into symbol numbers; and to create
+    // chinese symbol nodes, not pinyin ones.
+    CAlphNode *GetRoot(CDasherNode *pParent, unsigned int iLower, unsigned int iUpper, bool bEnteredLast, int iOffset);    
     
   protected:
-    ///Subclass CSymbolNode to disable learn-as-you-write (for Mandarin Dasher).
-    /// This subclass used directly only for punctuation; chinese symbols use CMandSym, below.
-    class CMandNode : public CSymbolNode {
+    ///Subclass of CSymbolNode for (converted) chinese-alphabet symbols:
+    /// these (a) disable learn-as-you-write (not supported in Mandarin Dasher),
+    /// (b) use the chinese (not pinyin, as CSymbolNode would) alphabet for text to display/enter
+    /// (c) determine their colour using GetCHColour rather than GetColour.
+    class CMandSym : public CSymbolNode {
     public:
       CMandarinAlphMgr *mgr() {return static_cast<CMandarinAlphMgr *>(CSymbolNode::mgr());}
+      ///Symbol constructor: display text from CHAlphabet, colour from GetCHColour
+      /// \param strGroup caption of any group(s) containing this symbol for which no nodes created; prepended to display text.
+      CMandSym(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, CMandarinAlphMgr *pMgr, symbol iSymbol);
+      ///Disable learn-as-you-write
       virtual void SetFlag(int iFlag, bool bValue);
+      ///Rebuilding not supported
       virtual CDasherNode *RebuildParent() {return 0;}
-      //Standard constructor, as for CSymbolNode
-      CMandNode(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, CMandarinAlphMgr *pMgr, symbol iSymbol);
-    protected:
-      ///Compatibility constructor with exact colour & label (inc. enclosing group nodes)
-      CMandNode(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, int iColour, const std::string &strDisplayText, CMandarinAlphMgr *pMgr, symbol iSymbol);
-    };
-    class CMandSym : public CMandNode {
-    public:
-      //CMandSym's only ever created as children of ConvRoots, so no enclosing group nodes to worry about.
-      CMandSym(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, int iColour, CMandarinAlphMgr *pMgr, symbol iSymbol);
     private:
       virtual const std::string &outputText();
     };
@@ -79,7 +71,7 @@ namespace Dasher {
     class CConvRoot : public CDasherNode {
     public:
       CMandarinAlphMgr *mgr() {return m_pMgr;}
-      /// \param pConversions set of chinese-alphabet symbol numbers that the PY can convert to.
+      /// \param pConversions set of chinese-alphabet symbol numbers that the PY can convert to; must have >1 element.
       CConvRoot(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, CMandarinAlphMgr *pMgr, const std::set<symbol> *pConversions);
       void PopulateChildren();
       int ExpectedNumChildren();
@@ -89,17 +81,34 @@ namespace Dasher {
       CMandarinAlphMgr *m_pMgr;
       const std::set<symbol> *m_pConversions;
     };
-    ///Override to make CMandNodes (for punctuation)
-    CMandNode *makeSymbol(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, int iBkgCol, symbol iSymbol);
-    ///Override to make CConvRoots for symbols <=1288, fall back to superclass for others (punctuation)
+    ///Called to create the node for a pinyin leaf symbol;
+    /// Overridden to call either CreateConvRoot or CreateCHSymbol, according to #chinese symbols under specified pinyin
+    /// \param iSymbol Symbol number in pinyin alphabet
     virtual CDasherNode *CreateSymbolNode(CAlphNode *pParent, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, int iBkgCol, symbol iSymbol);
-    ///Override: punctuation contexts, are the same as their parent (?!)
-    virtual CLanguageModel::Context CreateSymbolContext(CAlphNode *pParent, symbol iSymbol);
 
-    int AssignColour(int parentClr, int childIndex);
-    
+    ///Creates a CConvRoot, for a Pinyin symbol with multiple possible chinese symbols.
+    /// Colour is always 9 (opaque), so no need for background colour.
+    /// \param pParent parent node, context will be taken from here
+    /// \param iPYsym Symbol (leaf) in pinyin alphabet
+    CConvRoot *CreateConvRoot(CAlphNode *pParent, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, symbol iPYsym);
+      
+    ///Creates a node for (i.e. that will actually enter) a chinese symbol
+    /// \param pParent parent node: could be a CGroupNode (directly), if some pinyin symbol in that group had only
+    /// one corresponding chinese symbol (=> this), or a CConvRoot (if this chinese symbol is one of many possible
+    /// chinese symbols for a particular pinyin).
+    /// \param iContext parent node's context, from which to generate context for this node
+    /// \param strGroup caption of any elided groups (prepended to this node's caption)
+    /// \param iCHsym symbol number in chinese alphabet
+    CMandSym *CreateCHSymbol(CDasherNode *pParent, CLanguageModel::Context iContext, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, symbol iCHsym);
+
     void AssignSizes(std::vector<std::pair<symbol,unsigned int> > &vChildren, Dasher::CLanguageModel::Context context);
 
+    ///Gets colour for a specified chinese symbol and offset.
+    /// Wraps CHalphabet getcolour in case anything specified; if not,
+    /// supplies defaults (different from GetColour(sym,int)!). Also
+    /// implements 2-phase colour cycling by low-bit of offset (as GetColour).
+    int GetCHColour(symbol CHsym, int iOffset) const;
+    
     const CAlphInfo *m_pCHAlphabet;
     const CAlphabetMap *m_pCHAlphabetMap;
     ///Indexed by SPY (syll+tone) alphabet symbol number,



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]