[dasher: 10/38] Refactor: make CAlphInfo inherit group-tree-storage members from SGroupInfo



commit 51647cd642a9ce5a2bfb545b96a8e3eafd404ef9
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date:   Wed Dec 7 16:21:46 2011 +0000

    Refactor: make CAlphInfo inherit group-tree-storage members from SGroupInfo
    
    Some duplication/redundancy (both std+default alphs must set iEnd after adding
     all characters), but removes many special-cases in AlphMgr.
    
    Should allow MandarinAlphMgr to override enough of AlphMgr to reinterpret all
     character data.

 Src/DasherCore/Alphabet/AlphIO.cpp                 |   16 ++-
 Src/DasherCore/Alphabet/AlphInfo.cpp               |   16 +--
 Src/DasherCore/Alphabet/AlphInfo.h                 |   17 +-
 Src/DasherCore/Alphabet/GroupInfo.h                |   11 ++
 Src/DasherCore/AlphabetManager.cpp                 |  178 +++++++++++---------
 Src/DasherCore/AlphabetManager.h                   |   29 +++-
 Src/DasherCore/ConvertingAlphMgr.cpp               |    6 +-
 .../LanguageModelling/DictLanguageModel.cpp        |    2 +-
 .../LanguageModelling/MixtureLanguageModel.h       |    2 +-
 .../LanguageModelling/WordLanguageModel.cpp        |    2 +-
 Src/DasherCore/MandarinAlphMgr.cpp                 |   10 +-
 11 files changed, 175 insertions(+), 114 deletions(-)
---
diff --git a/Src/DasherCore/Alphabet/AlphIO.cpp b/Src/DasherCore/Alphabet/AlphIO.cpp
index 8a506f6..f74ebb0 100644
--- a/Src/DasherCore/Alphabet/AlphIO.cpp
+++ b/Src/DasherCore/Alphabet/AlphIO.cpp
@@ -121,7 +121,7 @@ CAlphInfo *CAlphIO::CreateDefault() {
 //     Default.Groups[0].Characters[i].Colour = i + 10;
 //   }
   // ---
-  Default.m_pBaseGroup = 0;
+  Default.pChild = 0;
   Default.Orientation = Opts::LeftToRight;
 
   //The following creates Chars.size()+2 actual character structs in the vector,
@@ -155,6 +155,10 @@ CAlphInfo *CAlphIO::CreateDefault() {
   Default.ControlCharacter->Text = "";
   Default.ControlCharacter->Colour = 8;
 
+  Default.iStart=1; Default.iEnd=Default.m_vCharacters.size()+1;
+  Default.iNumChildNodes = Default.m_vCharacters.size();
+  Default.pNext=Default.pChild=NULL;
+  
   return &Default;
 }
 
@@ -243,7 +247,7 @@ void CAlphIO::XmlStartHandler(const XML_Char *name, const XML_Char **atts) {
     if (m_vGroups.empty()) InputInfo->iNumChildNodes++; else m_vGroups.back()->iNumChildNodes++;
 
     //by default, the first group in the alphabet is invisible
-    pNewGroup->bVisible = (InputInfo->m_pBaseGroup!=NULL);
+    pNewGroup->bVisible = (InputInfo->pChild!=NULL);
 
     while(*atts != 0) {
       if(strcmp(*atts, "name") == 0) {
@@ -268,7 +272,7 @@ void CAlphIO::XmlStartHandler(const XML_Char *name, const XML_Char **atts) {
       atts += 2;
     }
 
-    SGroupInfo *&prevSibling(m_vGroups.empty() ? InputInfo->m_pBaseGroup : m_vGroups.back()->pChild);
+    SGroupInfo *&prevSibling((m_vGroups.empty() ? InputInfo : m_vGroups.back())->pChild);
 
     if (pNewGroup->iColour==-1 && pNewGroup->bVisible) {
       //no colour specified. Try to colour cycle, but make sure we choose
@@ -374,7 +378,7 @@ void Reverse(SGroupInfo *&pList) {
 void CAlphIO::XmlEndHandler(const XML_Char *name) {
 
   if(strcmp(name, "alphabet") == 0) {
-    Reverse(InputInfo->m_pBaseGroup);
+    Reverse(InputInfo->pChild);
 
     if (ParagraphCharacter) {
       InputInfo->iParagraphCharacter = InputInfo->m_vCharacters.size()+1;
@@ -389,6 +393,8 @@ void CAlphIO::XmlEndHandler(const XML_Char *name) {
       delete SpaceCharacter;
     }
 
+    InputInfo->iEnd = InputInfo->m_vCharacters.size()+1;
+
     //if (InputInfo->StartConvertCharacter.Text != "") InputInfo->iNumChildNodes++;
     //if (InputInfo->EndConvertCharacter.Text != "") InputInfo->iNumChildNodes++;
     Alphabets[InputInfo->AlphID] = InputInfo;
@@ -416,7 +422,7 @@ void CAlphIO::XmlEndHandler(const XML_Char *name) {
     finished->iEnd = InputInfo->m_vCharacters.size()+1;
     if (finished->iEnd == finished->iStart) {
       //empty group. Delete it now, and elide from sibling chain
-      SGroupInfo *&ptr=(m_vGroups.empty() ? InputInfo->m_pBaseGroup : m_vGroups.back()->pChild);
+      SGroupInfo *&ptr=(m_vGroups.empty() ? InputInfo : m_vGroups.back())->pChild;
       DASHER_ASSERT(ptr == finished);
       ptr = finished->pNext;
       delete finished;
diff --git a/Src/DasherCore/Alphabet/AlphInfo.cpp b/Src/DasherCore/Alphabet/AlphInfo.cpp
index 910ede9..c5fb5c0 100644
--- a/Src/DasherCore/Alphabet/AlphInfo.cpp
+++ b/Src/DasherCore/Alphabet/AlphInfo.cpp
@@ -75,25 +75,19 @@ CAlphInfo::CAlphInfo() {
   ControlCharacter=NULL;
   StartConvertCharacter=NULL;
   EndConvertCharacter=NULL;
-  m_pBaseGroup = 0;
+  //Members of SGroupInfo:
+  pChild=pNext=NULL; iStart=iEnd=1; bVisible=true;
   iNumChildNodes = 0;
+  
   m_iConversionID = 0;
   m_strDefaultContext = ". ";
   m_strCtxChar = "Â";
   m_bHidden=false;
 }
 
-void DeleteGroups(SGroupInfo *Info) {
-  for(SGroupInfo *next; Info; Info=next) {
-    next = Info->pNext;
-    DeleteGroups(Info->pChild);
-    delete Info;
-    Info = next;
-  }
-}
-
 CAlphInfo::~CAlphInfo() {
-  DeleteGroups(m_pBaseGroup);
+  pChild->RecursiveDelete();
+  pNext->RecursiveDelete();
 }
 
 CAlphInfo::character::character() {
diff --git a/Src/DasherCore/Alphabet/AlphInfo.h b/Src/DasherCore/Alphabet/AlphInfo.h
index 7cbbc1a..41720a1 100644
--- a/Src/DasherCore/Alphabet/AlphInfo.h
+++ b/Src/DasherCore/Alphabet/AlphInfo.h
@@ -55,7 +55,15 @@ namespace Dasher {
 /// One CAlphInfo object is created per alphabet when the alphabet.*.xml
 /// files are read in by CAlphIO, and a CAlphabetMap object is created for
 /// the alphabet currently in use (and deleted when the alphabet is changed).
-class Dasher::CAlphInfo {
+///
+/// Note the group structure stored by inheriting from SGroupInfo; these are filled
+/// with iStart==1 (as symbol numbers are 1-indexed; 0 is reserved to indicate an
+/// "unknown symbol", and for element 0 of the prob. array to contain a 0, and
+/// symbol -1 indicates End-Of-Stream), and iEnd == one more than the number of
+/// "text" symbols (i.e. inc space and para, but no control/conversion start/end)
+/// - this is for consistency with SGroupInfo, preserving that iEnd is one more
+/// than the highest valid index.
+class Dasher::CAlphInfo : public SGroupInfo {
 private:
   friend class CAlphIO;
   struct character {
@@ -66,11 +74,8 @@ private:
     std::string Foreground;
   };
 public:
+  
   const std::string &GetID() const {return AlphID;}
-  /// Return number of text symbols - inc space and para, but no control/conversion start/end
-  /// Note symbol numbers are 1-indexed; 0 is reserved to indicate an "unknown symbol" (-1 = End-Of-Stream),
-  /// and for element 0 of the probability array to contain a 0.
-  unsigned int GetNumberTextSymbols() const {return m_vCharacters.size();}
 
   Opts::ScreenOrientations GetOrientation() const {return Orientation;}
 
@@ -112,8 +117,6 @@ public:
   /// Defaults to  if not specified in alphabet.
   const std::string &GetContextEscapeChar() const {return m_strCtxChar;}
 
-  SGroupInfo *m_pBaseGroup;
-  int iNumChildNodes;
   ///0 = normal alphabet, contains symbols to output
   ///1 = Japanese (defunct)
   ///2 = Mandarin: symbols are merely phonemes, and match up (via displaytext)
diff --git a/Src/DasherCore/Alphabet/GroupInfo.h b/Src/DasherCore/Alphabet/GroupInfo.h
index 4ce1b3d..6361102 100644
--- a/Src/DasherCore/Alphabet/GroupInfo.h
+++ b/Src/DasherCore/Alphabet/GroupInfo.h
@@ -8,11 +8,22 @@ struct SGroupInfo {
   SGroupInfo *pChild;
   SGroupInfo *pNext;
   std::string strLabel;
+  ///lowest index of symbol that is in group
   int iStart;
+  //one more than the highest index of a symbol in the group.
+  // (iStart+1==iEnd => single character)
   int iEnd;
   int iColour;
   bool bVisible;
   int iNumChildNodes;
+  void RecursiveDelete() {
+    for(SGroupInfo *t=this; t; ) {
+      SGroupInfo *next = t->pNext;
+      t->pChild->RecursiveDelete();
+      delete t;
+      t = next;
+    }
+  }
 };
 /// \}
 
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index 4730f43..8dbc161 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -53,7 +53,7 @@ static char THIS_FILE[] = __FILE__;
 #endif
 
 CAlphabetManager::CAlphabetManager(CSettingsUser *pCreateFrom, CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet)
-  : CSettingsUser(pCreateFrom), m_pFirstGroup(NULL), m_pNCManager(pNCManager), m_pAlphabet(pAlphabet), m_pAlphabetMap(pAlphabet->MakeMap()), m_pInterface(pInterface), m_pLastOutput(NULL) {
+  : CSettingsUser(pCreateFrom), m_pBaseGroup(NULL), m_pNCManager(pNCManager), m_pAlphabet(pAlphabet), m_pAlphabetMap(pAlphabet->MakeMap()), m_pInterface(pInterface), m_pLastOutput(NULL) {
   //Look for a (single-octet) character not in the alphabet...
   for (char c=33; (c&0x80)==0; c++) {
     string s(&c,1);
@@ -77,7 +77,7 @@ void CAlphabetManager::CreateLanguageModel() {
       // If there is a bogus value for the language model ID, we'll default
       // to our trusty old PPM language model.
     case 0:
-      m_pLanguageModel = new CPPMLanguageModel(this, m_pAlphabet->GetNumberTextSymbols());
+      m_pLanguageModel = new CPPMLanguageModel(this, m_pAlphabet->iEnd-1);
       break;
     case 2:
       m_pLanguageModel = new CWordLanguageModel(this, m_pAlphabet, m_pAlphabetMap);
@@ -86,7 +86,7 @@ void CAlphabetManager::CreateLanguageModel() {
       m_pLanguageModel = new CMixtureLanguageModel(this, m_pAlphabet, m_pAlphabetMap);
       break;
     case 4:
-      m_pLanguageModel = new CCTWLanguageModel(m_pAlphabet->GetNumberTextSymbols());
+      m_pLanguageModel = new CCTWLanguageModel(m_pAlphabet->iEnd-1);
       break;
   }
 }
@@ -96,61 +96,81 @@ CTrainer *CAlphabetManager::GetTrainer() {
 }
 
 void CAlphabetManager::MakeLabels(CDasherScreen *pScreen) {
-  delete m_pFirstGroup;
-  for (vector<CDasherScreen::Label *>::iterator it=m_vLabels.begin(); it!=m_vLabels.end(); it++) {
-    delete (*it); *it = NULL;
-  }
-  m_vLabels.resize(m_pAlphabet->GetNumberTextSymbols()+1);
+  m_pBaseGroup->RecursiveDelete();
+  for (vector<CDasherScreen::Label *>::iterator it=m_vLabels.begin(); it!=m_vLabels.end(); it++)
+    delete (*it);
+  m_vLabels.clear();
   for (map<const SGroupInfo *,CDasherScreen::Label *>::iterator it=m_mGroupLabels.begin(); it!=m_mGroupLabels.end(); it++)
     delete it->second;
   m_mGroupLabels.clear();
-  m_pFirstGroup = copyGroups(pScreen, 1, m_pAlphabet->GetNumberTextSymbols()+1,m_pAlphabet->m_pBaseGroup);
-}
-
-SGroupInfo *CAlphabetManager::copyGroups(CDasherScreen *pScreen, int iStart, int iEnd, const SGroupInfo *pFirstChild) {  
-  for (int i = iStart; i< iEnd; i++) {
-    string strGroupPrefix;
-    if (pFirstChild && i>=pFirstChild->iStart) {
-      //reached group. elide any group with only a single child (see below).
-      // Variables store necessary properties of any elided groups:
-      int iBkgCol(-1);
-      for (const SGroupInfo *pInner=pFirstChild;;) {
-        if (pInner->iNumChildNodes>1) { //in/reached nontrivial subgroup - do make node for entire group:
-          SGroupInfo *pRes = new SGroupInfo(*pInner);
-          //apply properties of enclosing group(s)...
-          pRes->strLabel = strGroupPrefix + pRes->strLabel;
-          if (!pInner->bVisible) pRes->iColour = iBkgCol;
-          if (iBkgCol!=-1) pRes->bVisible=true;
-          if (pRes->strLabel.length())
-            m_mGroupLabels[pRes] = pScreen->MakeLabel(pRes->strLabel);
-          //and recurse on children
-          pRes->pChild = copyGroups(pScreen, pInner->iStart, pInner->iEnd, pInner->pChild);
-          pRes->pNext = copyGroups(pScreen, pInner->iEnd, iEnd, pFirstChild->pNext);
-          return pRes;
-        }
-        //were about to create a group node, which would have only one child
-        // (eventually, if the group node were PopulateChildren'd).
-        // Such a child would entirely fill it's parent (the group), and thus,
-        // creation/destruction of the child would cause the node's colour to flash
-        // between that for parent group and child.
-        // Hence, instead we elide the group node and create the child _here_...
-        
-        //1. however we also have to take account of the appearance of the elided group. Hence:
-        strGroupPrefix += pInner->strLabel;
-        if (pInner->bVisible) iBkgCol=pInner->iColour;
-        //2. inner group might contain a single subgroup, or a single symbol...
-        if (!pInner->pChild) break;
-        //...a subgroup, so go into it
-        pInner = pInner->pChild;
-        DASHER_ASSERT(!pInner->pNext);
-        //3. loop round inner loop...
-      }
-      pFirstChild = pFirstChild->pNext; //making a symbol, so we've still moved past the outer (elided) group
+  m_pBaseGroup = copyGroups(m_pAlphabet,pScreen);
+}
+
+SGroupInfo *CAlphabetManager::copyGroups(const SGroupInfo *pBase, CDasherScreen *pScreen) {
+  if (pBase==NULL) return NULL;
+  DASHER_ASSERT(pBase->iNumChildNodes); //zero-element groups elided by CAlphIO
+  if (m_vLabels.size()<pBase->iEnd) m_vLabels.resize(pBase->iEnd);
+  string strGroupPrefix; int iBkgCol(-1);
+  SGroupInfo * const next=copyGroups(pBase->pNext, pScreen);
+  while (pBase->iNumChildNodes==1) {
+    //were about to create a group node, which would have only one child
+    // (eventually, if the group node were PopulateChildren'd).
+    // Such a child would entirely fill it's parent (the group), and thus,
+    // creation/destruction of the child would cause the node's colour to flash
+    // between that for parent group and child.
+    // Hence, instead we elide the group node and create the child _here_...
+    
+    //1. however we also have to take account of the appearance of the elided group. Hence:
+    strGroupPrefix += pBase->strLabel;
+    if (pBase->bVisible) iBkgCol=pBase->iColour;
+    //2. group might contain a single subgroup, or a single symbol...
+    if (!pBase->pChild) {
+      //single symbol. Create its label, taking account of enclosing groups...
+      // (symbols are never transparent)
+      DASHER_ASSERT(pBase->iEnd == pBase->iStart+1);
+      string symLabel = strGroupPrefix + GetLabelText(pBase->iStart);
+      m_vLabels[pBase->iStart]=(symLabel.empty() ? NULL : pScreen->MakeLabel(symLabel));
+      //then skip this group, return any siblings
+      return next;
     }
-    string symLabel = strGroupPrefix + GetLabelText(i);
-    m_vLabels[i]=(symLabel.empty() ? NULL : pScreen->MakeLabel(symLabel));
+    //...a subgroup, so go into it
+    pBase = pBase->pChild;
+    DASHER_ASSERT(pBase->pNext==NULL); //can't have siblings as parent has only one child
+                                       //hence, original 'next' pointer is still valid
+    //3. loop round...
   }
-  return NULL;
+  //in or reached nontrivial subgroup - so make node for entire group
+  //First, make (unpefixed) labels for all children in (original) group
+  // (children of subgroups that are later elided, will have labels made at elision time)
+  {
+    SGroupInfo *pChild=pBase->pChild;
+    for (int i=pBase->iStart; i<pBase->iEnd;)
+      if (!pChild || i<pChild->iStart) {
+        const string &symLabel(GetLabelText(i));
+        m_vLabels[i] = (symLabel.empty() ? NULL : pScreen->MakeLabel(symLabel));
+        i++;
+      } else {
+        i=pChild->iEnd;
+        pChild = pChild->pNext;
+      }
+  }
+  SGroupInfo *pRes = new SGroupInfo(*pBase);
+  //apply properties of enclosing group(s)...
+  pRes->strLabel = strGroupPrefix + pRes->strLabel;
+  if (!pRes->bVisible)
+    if ((pRes->iColour = iBkgCol)!=-1) pRes->bVisible=true;
+  if (pRes->strLabel.length())
+    m_mGroupLabels[pRes] = pScreen->MakeLabel(pRes->strLabel);
+  //siblings (of this group or elided parent) copied already, from original
+  // (passed-in) pBase: if pBase unchanged, then still valid, whereas if pBase
+  // was changed by the above loop to be a subgroup of the original, then the subgroup
+  // has no children, so should be spliced in place of the original pBase.
+  pRes->pNext = next;
+
+  //recurse on children
+  pRes->pChild = copyGroups(pRes->pChild, pScreen);
+  DASHER_ASSERT(pRes->iNumChildNodes>1);
+  return pRes;
 }
 
 CWordGeneratorBase *CAlphabetManager::GetGameWords() {
@@ -261,10 +281,10 @@ CAlphabetManager::CSymbolNode::CSymbolNode(int iOffset, int iColour, CDasherScre
 
 CAlphabetManager::CGroupNode::CGroupNode(int iOffset, CDasherScreen::Label *pLabel, int iBkgCol, CAlphabetManager *pMgr, const SGroupInfo *pGroup)
 : CAlphNode(iOffset,
-            pGroup ? (pGroup->bVisible ? pGroup->iColour : iBkgCol)
-            : (iOffset&1) ? 7 : 137, //special case for root nodes
+            pGroup==pMgr->m_pBaseGroup ? ((iOffset&1) ? 7 : 137) //special case for root nodes
+            : (pGroup->bVisible ? pGroup->iColour : iBkgCol),
             pLabel, pMgr), m_pGroup(pGroup) {
-  if (m_pGroup && !m_pGroup->bVisible) SetFlag(NF_VISIBLE, false);
+  if (!m_pGroup->bVisible) SetFlag(NF_VISIBLE, false);
 }
 
 CAlphabetManager::CAlphNode *CAlphabetManager::GetRoot(CDasherNode *pParent, bool bEnteredLast, int iOffset) {
@@ -276,7 +296,7 @@ CAlphabetManager::CAlphNode *CAlphabetManager::GetRoot(CDasherNode *pParent, boo
   CAlphNode *pNewNode;
   if(p.first==0 || !bEnteredLast) {
     //couldn't extract last symbol (so probably using default context), or shouldn't
-    pNewNode = new CGroupNode(iNewOffset, NULL, 0, this, NULL); //default background colour
+    pNewNode = new CGroupNode(iNewOffset, NULL, 0, this, m_pBaseGroup); //default background colour
   } else {
     //new node represents a symbol that's already happened - i.e. user has already steered through it;
     // so either we're rebuilding, or else creating a new root from existing text (in edit box)
@@ -332,13 +352,16 @@ bool CAlphabetManager::CSymbolNode::GameSearchNode(symbol sym) {
   return false;
 }
 bool CAlphabetManager::CGroupNode::GameSearchNode(symbol sym) {
-  if (GetFlag(NF_ALLCHILDREN) ? GameSearchChildren(sym)
-      : m_pGroup ? (sym >= m_pGroup->iStart && sym < m_pGroup->iEnd)
-      : (sym >= 1 && sym < m_pMgr->m_pNCManager->GetAlphabet()->GetNumberTextSymbols()+1)) {
+  if (sym >= m_pGroup->iStart && sym < m_pGroup->iEnd) {
+    if (GetFlag(NF_ALLCHILDREN)) {
+      if (!GameSearchChildren(sym)) //recurse, to mark game child also
+        DASHER_ASSERT(false); //sym within this group, should definitely be found!
+    }
     SetFlag(NF_GAME, true);
     return true;
   }
-return false;
+  DASHER_ASSERT(!GameSearchChildren(sym));
+  return false;
 }
 
 void CAlphabetManager::CSymbolNode::GetContext(CDasherInterfaceBase *pInterface, const CAlphabetMap *pAlphabetMap, vector<symbol> &vContextSymbols, int iOffset, int iLength) {
@@ -355,15 +378,15 @@ symbol CAlphabetManager::CSymbolNode::GetAlphSymbol() {
 }
 
 void CAlphabetManager::CSymbolNode::PopulateChildren() {
-  m_pMgr->IterateChildGroups(this, NULL, NULL);
+  m_pMgr->IterateChildGroups(this, m_pMgr->m_pBaseGroup, NULL);
 }
 int CAlphabetManager::CAlphNode::ExpectedNumChildren() {
-  int i=m_pMgr->m_pAlphabet->iNumChildNodes;
+  int i=m_pMgr->m_pBaseGroup->iNumChildNodes;
   return (m_pMgr->GetBoolParameter(BP_CONTROL_MODE)) ? i+1 : i;
 }
 
 void CAlphabetManager::GetProbs(vector<unsigned int> *pProbInfo, CLanguageModel::Context context) {
-  const unsigned int iSymbols = m_pAlphabet->GetNumberTextSymbols();
+  const unsigned int iSymbols = m_pBaseGroup->iEnd-1;
   
   // TODO - sort out size of control node - for the timebeing I'll fix the control node at 5%
   // TODO: New method (see commented code) has been removed as it wasn' working.
@@ -409,8 +432,7 @@ std::vector<unsigned int> *CAlphabetManager::CAlphNode::GetProbInfo() {
 }
 
 std::vector<unsigned int> *CAlphabetManager::CGroupNode::GetProbInfo() {
-  if (m_pGroup && Parent() && Parent()->mgr() == mgr()) {
-    DASHER_ASSERT(Parent()->offset() == offset());
+  if (Parent() && Parent()->mgr() == mgr() && Parent()->offset()==offset()) {
     return (static_cast<CAlphNode *>(Parent()))->GetProbInfo();
   }
   //nope, no usable parent. compute here...
@@ -422,7 +444,7 @@ void CAlphabetManager::CGroupNode::PopulateChildren() {
 }
 
 int CAlphabetManager::CGroupNode::ExpectedNumChildren() {
-  return (m_pGroup) ? m_pGroup->iNumChildNodes : CAlphNode::ExpectedNumChildren();
+  return m_pGroup->iNumChildNodes;
 }
 
 CAlphabetManager::CGroupNode *CAlphabetManager::CreateGroupNode(CAlphNode *pParent, int iBkgCol, const SGroupInfo *pInfo) {
@@ -500,9 +522,9 @@ CDasherNode *CAlphabetManager::CSymbolNode::RebuildSymbol(CAlphNode *pParent, sy
 void CAlphabetManager::IterateChildGroups(CAlphNode *pParent, const SGroupInfo *pParentGroup, CAlphBase *buildAround) {
   std::vector<unsigned int> *pCProb(pParent->GetProbInfo());
   DASHER_ASSERT((*pCProb)[0] == 0);
-  const int iMin(pParentGroup ? pParentGroup->iStart : 1);
-  const int iMax(pParentGroup ? pParentGroup->iEnd : m_pAlphabet->GetNumberTextSymbols()+1);
-  unsigned int iRange(pParentGroup ? ((*pCProb)[iMax-1] - (*pCProb)[iMin-1]) : CDasherModel::NORMALIZATION);
+  const int iMin(pParentGroup->iStart);
+  const int iMax(pParentGroup->iEnd);
+  unsigned int iRange(((*pCProb)[iMax-1] - (*pCProb)[iMin-1]));
 
   // TODO: Think through alphabet file formats etc. to make this class easier.
   // TODO: Throw a warning if parent node already has children
@@ -510,7 +532,7 @@ void CAlphabetManager::IterateChildGroups(CAlphNode *pParent, const SGroupInfo *
   // Create child nodes and add them
 
   int i(iMin); //lowest index of child which we haven't yet added
-  const SGroupInfo *pCurrentNode(pParentGroup ? pParentGroup->pChild : m_pFirstGroup);
+  const SGroupInfo *pCurrentNode(pParentGroup->pChild);
   // The SGroupInfo structure has something like linked list behaviour
   // Each SGroupInfo contains a pNext, a pointer to a sibling group info
   while (i < iMax) {
@@ -621,10 +643,14 @@ CDasherNode *CAlphabetManager::CGroupNode::RebuildParent() {
 
   if (Parent()) return Parent();
 
-  // CGroupNodes with an m_pGroup have a container i.e. the parent group, unless
-  // m_pGroup==NULL => "root" node where m_pMgr->m_pFirstGroup is the *first*child*...
-  if (m_pGroup == NULL) return NULL;
-
+  if (m_pGroup == m_pMgr->m_pBaseGroup) {
+    //top level root node.
+    //if (offset()>0), there was _something_ before us, like
+    // a control node; but we no longer know what!
+    return NULL;
+  }
+  
+  //All other CGroupNode's have a container i.e. the parent group
   return CAlphBase::RebuildParent();
 }
 
@@ -646,7 +672,7 @@ CDasherNode *CAlphabetManager::CAlphBase::RebuildParent() {
 
 void CAlphabetManager::CAlphBase::RebuildForwardsFromAncestor(CAlphNode *pNewNode) {
   //now fill in the new node - recursively - until it reaches us
-  m_pMgr->IterateChildGroups(pNewNode, NULL, this);
+  m_pMgr->IterateChildGroups(pNewNode, m_pMgr->m_pBaseGroup, this);
 }
 
 // TODO: Shouldn't there be an option whether or not to learn as we write?
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index 044dd88..32131a9 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -45,12 +45,23 @@ namespace Dasher {
   /// to the appropriate alphabet file, with sizes given by the
   /// language model.
   ///
+  /// Note Dec11, refactoring to allow subclasses to change how character
+  /// data is obtained from the alphabet. All information on valid symbol indices
+  /// and the tree of groups, is obtained from m_pBaseGroup, which is created
+  /// by a call to copyGroups. Besides this, the only routines accessing _symbol_
+  /// data from the alphabet are: CreateLanguageModel; GetTrainer;
+  /// GetColour (called from CSymbolNode constructor); CreateSymbolNode and
+  /// CSymbolNode::outputText(). [many other routines access e.g. default context, training file, and so on]
+
   class CAlphabetManager : public CNodeManager, protected CSettingsUser {
   public:
     ///Create a new AlphabetManager. Note, not usable until CreateLanguageModel() called.
     CAlphabetManager(CSettingsUser *pCreateFrom, CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet);
+    
     ///Creates the LM, and stores in m_pLanguageModel. Must be called after construction,
     /// before the AlphMgr is used. Default implementation switches on LP_LANGUAGE_MODEL_ID.
+    /// Note subclasses changing the interpretation of the AlphInfo, should override
+    /// this to take account of its new meaning.
     virtual void CreateLanguageModel();
 
     virtual void MakeLabels(CDasherScreen *pScreen);
@@ -66,8 +77,20 @@ namespace Dasher {
     /// \param pInterface to use for I/O by calling WriteTrainFile(fname,txt)
     void WriteTrainFileFull(CDasherInterfaceBase *pInterface);
   protected:
-    ///Post-processed version of alphabet group tree, eliding all groups with only a single child.
-    SGroupInfo *m_pFirstGroup;
+    ///Base of all group+character information presented to the user;
+    /// created by calling copyGroups on the alphabet.
+    SGroupInfo *m_pBaseGroup;
+    ///Called to create the base group the AlphMgr will use from the alphabet.
+    /// The default implementation elides all single-element groups, and fills in
+    /// m_mGroupLabels and m_vLabels using the supplied screen; subclasses may
+    /// override to do more, but should call the superclass method to set up the
+    /// labels too.
+    /// (Note: each invocation creates labels for all symbols in pBase, *and*
+    /// all symbols in any later siblings of pBase (by recursive call on pNext).
+    /// Of those, symbols in any child groups may be made by recursive call on
+    /// pChild, but only if pBase has >1 child node (symbol/group).)
+    virtual SGroupInfo *copyGroups(const SGroupInfo *pBase, CDasherScreen *pScreen);
+    
     ///A label for each group in the elided tree
     std::map<const SGroupInfo *,CDasherScreen::Label *> m_mGroupLabels;
     ///A label for each symbol, indexed by symbol id (element 0 = null)
@@ -242,8 +265,6 @@ namespace Dasher {
     /// Returns array of non-cumulative probs. Should this be protected and/or virtual???
     void GetProbs(std::vector<unsigned int> *pProbs, CLanguageModel::Context iContext);
     
-    SGroupInfo *copyGroups(CDasherScreen *pScreen, int iStart, int iEnd, const SGroupInfo *pFirstChild);
-    
     ///Constructs child nodes under the specified parent according to provided group.
     /// Nodes are created by calling CreateSymbolNode and CreateGroupNode, unless buildAround is non-null.
     /// \param pParentGroup group describing which symbols and/or subgroups should be constructed
diff --git a/Src/DasherCore/ConvertingAlphMgr.cpp b/Src/DasherCore/ConvertingAlphMgr.cpp
index 8574bca..ca6e686 100644
--- a/Src/DasherCore/ConvertingAlphMgr.cpp
+++ b/Src/DasherCore/ConvertingAlphMgr.cpp
@@ -25,10 +25,10 @@ CConvertingAlphMgr::~CConvertingAlphMgr() {
 }
 
 CDasherNode *CConvertingAlphMgr::CreateSymbolNode(CAlphNode *pParent, symbol iSymbol) {
-  int i=m_pAlphabet->GetNumberTextSymbols()+1;
-  if (iSymbol == i) {
+  //int i=m_pAlphabet->iEnd;
+  if (iSymbol == m_pAlphabet->iEnd) {
     vector<unsigned int> *pCProb(pParent->GetProbInfo());
-    DASHER_ASSERT(pCProb->size() == m_pAlphabet->GetNumberTextSymbols()+2);//initial 0, final conversion prob
+    DASHER_ASSERT(pCProb->size() == m_pAlphabet->iEnd+1);//initial 0, final conversion prob
 
     //this used to be the "CloneAlphContext" method. Why it uses the
     // ConversionManager's LM to clone a context from an Alphabet Node,
diff --git a/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp b/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
index 10d64fa..00eeedd 100644
--- a/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
@@ -91,7 +91,7 @@ CDictLanguageModel::CDictnode * CDictLanguageModel::AddSymbolToNode(CDictnode *p
 /////////////////////////////////////////////////////////////////////
 
 CDictLanguageModel::CDictLanguageModel(CSettingsUser *pCreator, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap)
-:CLanguageModel(pAlph->GetNumberTextSymbols()), CSettingsUser(pCreator), m_pAlphMap(pAlphMap), m_iSpaceSymbol(pAlph->GetSpaceSymbol()), NodesAllocated(0), max_order(0), m_NodeAlloc(8192), m_ContextAlloc(1024) {
+:CLanguageModel(pAlph->iEnd-1), CSettingsUser(pCreator), m_pAlphMap(pAlphMap), m_iSpaceSymbol(pAlph->GetSpaceSymbol()), NodesAllocated(0), max_order(0), m_NodeAlloc(8192), m_ContextAlloc(1024) {
   m_pRoot = m_NodeAlloc.Alloc();
   m_pRoot->sbl = -1;
   m_rootcontext = new CDictContext(m_pRoot, 0);
diff --git a/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h b/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h
index 129c7d1..6f0d518 100644
--- a/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h
@@ -29,7 +29,7 @@ namespace Dasher {
     /////////////////////////////////////////////////////////////////////////////
 
     CMixtureLanguageModel(CSettingsUser *pCreator, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap)
-    : CLanguageModel(pAlph->GetNumberTextSymbols()), CSettingsUser(pCreator) {
+    : CLanguageModel(pAlph->iEnd-1), CSettingsUser(pCreator) {
 
       //      std::cout << m_pAlphabet << std::endl;
 
diff --git a/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp b/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
index 32529af..dbcbb3d 100644
--- a/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
@@ -123,7 +123,7 @@ CWordLanguageModel::CWordnode * CWordLanguageModel::AddSymbolToNode(CWordnode *p
 
 CWordLanguageModel::CWordLanguageModel(CSettingsUser *pCreator, 
 				       const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap)
-  :CLanguageModel(pAlph->GetNumberTextSymbols()), CSettingsUser(pCreator), m_pAlphMap(pAlphMap), m_iSpaceSymbol(pAlph->GetSpaceSymbol()), NodesAllocated(0), 
+  :CLanguageModel(pAlph->iEnd-1), CSettingsUser(pCreator), m_pAlphMap(pAlphMap), m_iSpaceSymbol(pAlph->GetSpaceSymbol()), NodesAllocated(0), 
    max_order(2), m_NodeAlloc(8192), m_ContextAlloc(1024) {
   
   // Construct a root node for the trie
diff --git a/Src/DasherCore/MandarinAlphMgr.cpp b/Src/DasherCore/MandarinAlphMgr.cpp
index 7499176..07563d8 100644
--- a/Src/DasherCore/MandarinAlphMgr.cpp
+++ b/Src/DasherCore/MandarinAlphMgr.cpp
@@ -48,7 +48,7 @@ static char THIS_FILE[] = __FILE__;
 
 CMandarinAlphMgr::CMandarinAlphMgr(CSettingsUser *pCreator, CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphIO *pAlphIO)
   : CAlphabetManager(pCreator, pInterface, pNCManager, pAlphabet),
-    m_pConversionsBySymbol(new vector<symbol>[GetAlphabet()->GetNumberTextSymbols()+1]) {
+    m_pConversionsBySymbol(new vector<symbol>[GetAlphabet()->iEnd]) {
   DASHER_ASSERT(pAlphabet->m_iConversionID==2);
       
   //the CHAlphabet contains a group for each SPY syllable+tone, with symbols being chinese characters.      
@@ -64,7 +64,7 @@ CMandarinAlphMgr::CMandarinAlphMgr(CSettingsUser *pCreator, CDasherInterfaceBase
     conversions[pCHAlphabet->GetDisplayText(para)]=pair<symbol,symbol>(para,para+1);
   //Non-recursive traversal of all the groups in the CHAlphabet (we don't care where they are, just to find them)
   vector<const SGroupInfo *> groups;
-  groups.push_back(pCHAlphabet->m_pBaseGroup);
+  groups.push_back(pCHAlphabet->pChild);
   while (!groups.empty()) {
     const SGroupInfo *pGroup(groups.back()); groups.pop_back();
     if (pGroup->pNext) groups.push_back(pGroup->pNext);
@@ -88,7 +88,7 @@ CMandarinAlphMgr::CMandarinAlphMgr(CSettingsUser *pCreator, CDasherInterfaceBase
   // between indices and actual chinese unicode characters.
   m_CHtext.push_back(""); m_CHdisplayText.push_back(""); m_CHcolours.push_back(0); //as usual, element 0 is the "unknown symbol"
   std::vector<symbol> vSyms;
-  for (symbol i=1; i<=GetAlphabet()->GetNumberTextSymbols(); i++) {
+  for (symbol i=1; i<GetAlphabet()->iEnd; i++) {
     DASHER_ASSERT(conversions.find(m_pAlphabet->GetDisplayText(i))!=conversions.end());
     pair<symbol,symbol> convs(conversions[m_pAlphabet->GetDisplayText(i)]);
     //for each chinese unicode character in the group, hash it to ensure same unicode = same index into m_CH{text,displayText,AlphabetMap}
@@ -132,7 +132,7 @@ CMandarinAlphMgr::~CMandarinAlphMgr() {
 void CMandarinAlphMgr::CreateLanguageModel() {
   //std::cout<<"CHALphabet size "<< pCHAlphabet->GetNumberTextSymbols(); [7603]
   //std::cout<<"Setting PPMPY model"<<std::endl;
-  m_pLanguageModel = new CPPMPYLanguageModel(this, m_CHtext.size()-1, m_pAlphabet->GetNumberTextSymbols());
+  m_pLanguageModel = new CPPMPYLanguageModel(this, m_CHtext.size()-1, m_pAlphabet->iEnd-1);
 }
 
 CTrainer *CMandarinAlphMgr::GetTrainer() {
@@ -150,7 +150,7 @@ CAlphabetManager::CAlphNode *CMandarinAlphMgr::GetRoot(CDasherNode *pParent, boo
 
   CAlphNode *pNewNode;
   if (p.first==0 || !bEnteredLast) {
-    pNewNode = new CGroupNode(iNewOffset, NULL, 0, this, NULL);
+    pNewNode = new CGroupNode(iNewOffset, NULL, 0, this, m_pBaseGroup);
   } else {
     DASHER_ASSERT(p.first>0 && p.first<m_CHtext.size());
     pNewNode = new CMandSym(iNewOffset, this, p.first, 0);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]