[dasher] When (re)building nodes, only use characters in current alphabet to make context

From: Patrick Welche <pwelche src gnome org>
To: svn-commits-list gnome org
Cc:
Subject: [dasher] When (re)building nodes, only use characters in current alphabet to make context
Date: Sat, 19 Dec 2009 22:20:24 +0000 (UTC)
commit 7e2c7e54c790b7e6d180c95e234f563e4022e7fb
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date:   Fri Dec 18 15:46:55 2009 +0000

    When (re)building nodes, only use characters in current alphabet to make context
    
    (any non-character ignored, now along with anything previous).
    Default context used if no legal characters before context.
    Refuses to RebuildParent any node for a symbol not in alphabet
        (user must change language to reverse any further)
    Also put in a quick one-line implementation of DasherModel::GetOffset. Is it ok?

 Src/DasherCore/Alphabet/Alphabet.cpp |   36 +++---------------------
 Src/DasherCore/Alphabet/Alphabet.h   |    2 +-
 Src/DasherCore/AlphabetManager.cpp   |   50 +++++++++++++++++++++++++++------
 Src/DasherCore/DasherModel.h         |    5 ++-
 4 files changed, 49 insertions(+), 44 deletions(-)
---
diff --git a/Src/DasherCore/Alphabet/Alphabet.cpp b/Src/DasherCore/Alphabet/Alphabet.cpp
index 2bfa11a..328a1ab 100644
--- a/Src/DasherCore/Alphabet/Alphabet.cpp
+++ b/Src/DasherCore/Alphabet/Alphabet.cpp
@@ -26,7 +26,7 @@
 #include "Alphabet.h"
 #include "AlphabetMap.h"
 #include <cstring>
-
+#include <sstream>
 
 using namespace Dasher;
 using namespace std;
@@ -185,38 +185,10 @@ void CAlphabet::GetSymbols(std::vector<symbol> &symbols, std::istream &in) const
   delete [] utfchar;
 }
 
-void CAlphabet::GetSymbols(std::vector<symbol>& Symbols, std::string& Input) const
+void CAlphabet::GetSymbols(std::vector<symbol>& Symbols, const std::string& Input) const
 {
-  string Tmp;
-  symbol CurSymbol = 0;
-  int extras;
-  unsigned int bit;
-
-  for(unsigned int i = 0; i < Input.size(); i++) {
-
-    Tmp = Input[i];
-
-    /* The string we've been given is in UTF-8. The symbols are
-       also in UTF-8, so we need to pass the entire UTF-8 character
-       which may be several bytes long. RFC 2279 describes this
-       encoding */
-
-    if(Input[i] & 0x80) {    // Character is more than 1 byte long
-      extras = 1;
-      for(bit = 0x20; (Input[i] & bit) != 0; bit >>= 1)
-        extras++;
-      if(extras > 5) {
-      }                         // Malformed character
-      while(extras-- > 0) {
-        Tmp += Input[++i];
-      }
-    }
-
-    CurSymbol = TextMap.Get(Tmp);
-
-    if(CurSymbol != 0)
-      Symbols.push_back(CurSymbol);
-  }
+  std::istringstream in(Input);
+  GetSymbols(Symbols, in);
 }
 
 // add single char to the character set
diff --git a/Src/DasherCore/Alphabet/Alphabet.h b/Src/DasherCore/Alphabet/Alphabet.h
index 6822019..da5fe49 100644
--- a/Src/DasherCore/Alphabet/Alphabet.h
+++ b/Src/DasherCore/Alphabet/Alphabet.h
@@ -109,7 +109,7 @@ namespace Dasher {
     // is not necessarily reversible by repeated use of GetText. Some text
     // may not be recognised and so discarded. }}}
 
-    void GetSymbols(std::vector<symbol> &Symbols, std::string &Input) const;
+    void GetSymbols(std::vector<symbol> &Symbols, const std::string &Input) const;
     void GetSymbols(std::vector<symbol> &symbols, std::istream &in) const;
 
     void Trace() const;         // diagnostic
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index a35f6d0..2d00985 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -76,7 +76,15 @@ CAlphabetManager::CGroupNode *CAlphabetManager::makeGroup(CDasherNode *pParent,
 CAlphabetManager::CAlphNode *CAlphabetManager::GetRoot(CDasherNode *pParent, int iLower, int iUpper, bool bEnteredLast, int iOffset) {
   
   CAlphNode *pNewNode = BuildNodeForOffset(pParent, iLower, iUpper, bEnteredLast, max(-1,iOffset-1));
-
+  if (!pNewNode) {
+    DASHER_ASSERT(bEnteredLast);
+    //could not build a node 'responsible' for entering the preceding character,
+    // as said character is not in the current alphabet! However, we'll allow the
+    // user to start entering text afresh
+    return BuildNodeForOffset(pParent, iLower, iUpper, false, iOffset);
+    // (the new node'll be constructed using the current alphabet's default context,
+    // i.e. start of sentence)
+  }
   pNewNode->SetFlag(NF_SEEN, true);
   
   //    if(m_bGameMode) {
@@ -108,13 +116,30 @@ CAlphabetManager::CAlphNode *CAlphabetManager::BuildNodeForOffset(CDasherNode *p
   
   CAlphNode *pNewNode;
   CLanguageModel::Context iContext = m_pLanguageModel->CreateEmptyContext();
-    
-  for(std::vector<symbol>::iterator it(vContextSymbols.begin()); it != vContextSymbols.end(); ++it)
-    if(*it != 0)
-      m_pLanguageModel->EnterSymbol(iContext, *it);
-    
-  if((vContextSymbols.size() == 0) || !bSym) {
-    //this node can't be responsible for entering the last symbol if there wasn't one!
+  
+  std::vector<symbol>::iterator it = vContextSymbols.end();
+  while (it!=vContextSymbols.begin()) {
+    if (*(--it) == 0) {
+      //found an impossible symbol! start after it
+      ++it;
+      break;
+    }
+  }
+  if (it == vContextSymbols.end()) {
+    //previous character was not in the alphabet!
+    if (bSym) return NULL; //can't construct a node "responsible" for entering such a character!
+    //ok. Create a node as if we were starting a new sentence...
+    vContextSymbols.clear();
+    m_pNCManager->GetAlphabet()->GetSymbols(vContextSymbols, m_pNCManager->GetAlphabet()->GetDefaultContext());
+    it = vContextSymbols.begin();
+    //TODO: What it the default context somehow contains symbols not in the alphabet?
+  }
+  //enter the symbols we could make sense of, into the LM context...
+  while (it != vContextSymbols.end()) {
+    m_pLanguageModel->EnterSymbol(iContext, *(it++));
+  }
+  
+  if(!bSym) {
     pDisplayInfo->strDisplayText = ""; //equivalent to do m_pNCManager->GetAlphabet()->GetDisplayText(0)
     pDisplayInfo->iColour = m_pNCManager->GetAlphabet()->GetColour(0, iNewOffset%2);
     pNewNode = makeGroup(pParent, iLower, iUpper, pDisplayInfo, NULL);
@@ -426,7 +451,14 @@ CDasherNode *CAlphabetManager::CAlphNode::RebuildParent(int iNewOffset) {
   if (Parent()) return Parent();
   
   CAlphNode *pNewNode = m_pMgr->BuildNodeForOffset(NULL, 0, 0, iNewOffset!=-1, iNewOffset);
-
+  if (!pNewNode) {
+    //could not rebuild parent node, as the preceding character was not
+    // in the current alphabet. Returning null means the user won't be able
+    // to reverse any further; he'll have to change language (to one
+    // including that symbol) instead.
+    return NULL;
+  }
+  
   //now fill in the new node - recursively - until it reaches us
   m_pMgr->IterateChildGroups(pNewNode, NULL, this);
 
diff --git a/Src/DasherCore/DasherModel.h b/Src/DasherCore/DasherModel.h
index ea8747f..f2f4b5d 100644
--- a/Src/DasherCore/DasherModel.h
+++ b/Src/DasherCore/DasherModel.h
@@ -186,11 +186,12 @@ class Dasher::CDasherModel:public CFrameRate, private NoClones
   void SetOffset(int iLocation, CDasherView *pView);
 
   ///
-  /// TODO: Implement this
+  /// TODO: Figure out how all these "offset"s work / relate to each other - if they do! In particular,
+  /// what do we need DasherModel's own m_iOffset (which measures in _bytes_, not unicode characters!) for?
   ///
 
   int GetOffset() {
-    return 0;
+    return m_pLastOutput->m_iOffset+1;
   };
 
   /// Create the children of a Dasher node
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]