[dasher] Cleanups to Alphabet code:



commit 3bc06daeaba3f39ef1e61e42a32ece2356f6d1b4
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date:   Mon Aug 10 10:46:29 2009 +0200

    Cleanups to Alphabet code:
    - move RecursiveAdd into Add, as 3rd param was unused;
    - remove unused 'KeyIsPrefix' member/params (was always false!)
    - change many signatures to use references not pointers
    - remove unused boolean parameter "IsMore" from CAlphabet::GetSymbols
    - remove CAlphabet::GetSymbolsFull (use GetSymbols!)
    - remove NCManager::LearnText (unused)
    - {Un/}signed comparison in AlphabetManagerFactory
    (some of this already appeared in earlier patches)

 ChangeLog                                          |    2 +
 Src/DasherCore/Alphabet/Alphabet.cpp               |   54 +++-----------------
 Src/DasherCore/Alphabet/Alphabet.h                 |   17 +-----
 Src/DasherCore/AlphabetManager.cpp                 |    4 +-
 Src/DasherCore/AlphabetManager.h                   |    2 +-
 Src/DasherCore/AlphabetManagerFactory.cpp          |    2 +-
 .../LanguageModelling/DictLanguageModel.cpp        |    2 +-
 .../LanguageModelling/WordLanguageModel.cpp        |    2 +-
 Src/DasherCore/NodeCreationManager.cpp             |   11 +----
 Src/DasherCore/NodeCreationManager.h               |    1 -
 Src/DasherCore/PinYinConversionHelper.cpp          |    6 +-
 Src/DasherCore/Trainer.cpp                         |    9 ++--
 Src/DasherCore/TrainingHelper.cpp                  |    2 +-
 13 files changed, 27 insertions(+), 87 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index a338a4c..5210130 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -6,6 +6,8 @@
 	* DasherView{,Square}: Remove b1D/bNonLinearity.
 	* MacOSX: Remove ZippyCache as DasherViewOpenGL makes no use of it.
 	* Remove empty DasherView.inl and move input filters into namespace Dasher.
+	* Change signatures (e.g. GetSymbols) from pointers to references;
+	  g/c IsMore, GetSymbolsFull, LearnText.
 
 2009-08-08  Alan Lawrence <acl33 inf phy cam ac uk>
 
diff --git a/Src/DasherCore/Alphabet/Alphabet.cpp b/Src/DasherCore/Alphabet/Alphabet.cpp
index fc70415..53516ca 100644
--- a/Src/DasherCore/Alphabet/Alphabet.cpp
+++ b/Src/DasherCore/Alphabet/Alphabet.cpp
@@ -184,30 +184,30 @@ void CAlphabet::GetSymbols(std::vector<symbol> &symbols, std::istream &in) const
   delete [] utfchar;
 }
 
-void CAlphabet::GetSymbols(std::vector<symbol> *Symbols, std::string * Input, bool IsMore) const
+void CAlphabet::GetSymbols(std::vector<symbol>& Symbols, std::string& Input) const
 {
   string Tmp;
-  symbol CurSymbol = 0, TmpSymbol = 0;
+  symbol CurSymbol = 0;
   int extras;
   unsigned int bit;
 
-  for(unsigned int i = 0; i < Input->size(); i++) {
+  for(unsigned int i = 0; i < Input.size(); i++) {
 
-    Tmp = (*Input)[i];
+    Tmp = Input[i];
 
     /* The string we've been given is in UTF-8. The symbols are
        also in UTF-8, so we need to pass the entire UTF-8 character
        which may be several bytes long. RFC 2279 describes this
        encoding */
 
-    if((*Input)[i] & 0x80) {    // Character is more than 1 byte long
+    if(Input[i] & 0x80) {    // Character is more than 1 byte long
       extras = 1;
-      for(bit = 0x20; ((*Input)[i] & bit) != 0; bit >>= 1)
+      for(bit = 0x20; (Input[i] & bit) != 0; bit >>= 1)
         extras++;
       if(extras > 5) {
       }                         // Malformed character
       while(extras-- > 0) {
-        Tmp += (*Input)[++i];
+        Tmp += Input[++i];
       }
     }
 
@@ -216,46 +216,6 @@ void CAlphabet::GetSymbols(std::vector<symbol> *Symbols, std::string * Input, bo
     if(CurSymbol != 0)
       Symbols->push_back(CurSymbol);
   }
-
-  if(IsMore)
-    *Input = "";
-}
-
-void CAlphabet::GetSymbolsFull(std::vector<symbol > *Symbols, std::string *Input) const {
- 
-  std::string::iterator it = Input->begin();
-
-  while(it != Input->end()) {
-    unsigned char c = static_cast<unsigned char>(*it);
-
-    int iNBytes;
-
-    if(c <= 0x7F)
-      iNBytes = 1;
-    else if((c >= 0xC2) && (c <= 0xDF))
-      iNBytes = 2;
-    else if((c >= 0xE0) && (c <= 0xEF))
-      iNBytes = 3;
-    else if((c >= 0xF0) && (c <= 0xF4))
-      iNBytes = 4;
-    else {
-      // TODO: Error condition - handle this.
-      iNBytes = 1;
-    }
-
-    std::string strCurrentSymbol(1, *it);
-
-    for(int i = 0; i < iNBytes - 1; ++i) {
-      ++it;
-      strCurrentSymbol += *it;
-    }
-
-    // TODO: Error condition on reaching end of string prematurely.
-
-    Symbols->push_back(TextMap.Get(strCurrentSymbol));
-
-    ++it;
-  }
 }
 
 // add single char to the character set
diff --git a/Src/DasherCore/Alphabet/Alphabet.h b/Src/DasherCore/Alphabet/Alphabet.h
index 8919e67..52b3b8f 100644
--- a/Src/DasherCore/Alphabet/Alphabet.h
+++ b/Src/DasherCore/Alphabet/Alphabet.h
@@ -107,21 +107,10 @@ namespace Dasher {
     // return group membership of i'th symbol
     // Fills Symbols with the symbols corresponding to Input. {{{ Note that this
     // is not necessarily reversible by repeated use of GetText. Some text
-    // may not be recognised and so discarded. If IsMore is true then Input
-    // is truncated to any final characters that were not used due to ambiguous
-    // continuation. If IsMore is false Input is assumed to be all the available
-    // text and so a symbol will be returned for a final "a" even if "ae" is
-    // defined as its own symbol. }}}
-    void GetSymbols(std::vector<symbol> *Symbols, std::string * Input, bool IsMore) const;
-    void GetSymbols(std::vector<symbol> &symbols, std::istream &in) const;
-
-
-    /// Look up symbols corresponding to string. Cannot cope with
-    /// parial input, but knows about UTF-8, so can include 'unknown
-    /// symbol' as 0 in the output. Slower than GetSymbols, so don't
-    /// use for import, but is useful for looking up contexts.
-    void GetSymbolsFull(std::vector<symbol > *Symbols, std::string *Input) const;
+    // may not be recognised and so discarded. }}}
 
+    void GetSymbols(std::vector<symbol> &Symbols, std::string& Input, bool IsMore) const;
+    void GetSymbols(std::vector<symbol> &symbols, std::istream &in) const;
 
     void Trace() const;         // diagnostic
 
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index de4d66f..33a48a4 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -501,13 +501,13 @@ void CAlphabetManager::SetFlag(CDasherNode *pNode, int iFlag, bool bValue) {
   }
 }
 
-void CAlphabetManager::BuildContext(std::string strContext, bool bRoot, CLanguageModel::Context &oContext, symbol &iSymbol) {
+void CAlphabetManager::BuildContext(std::string &strContext, bool bRoot, CLanguageModel::Context &oContext, symbol &iSymbol) {
   // Hopefully this will obsolete any need to handle contexts outside
   // of the alphabet manager - check this and remove resulting
   // redundant code
 
   std::vector<symbol> vContextSymbols;
-  m_pNCManager->GetAlphabet()->GetSymbolsFull(&vContextSymbols, &strContext);
+  m_pNCManager->GetAlphabet()->GetSymbols(vContextSymbols, strContext);
  
   oContext = m_pLanguageModel->CreateEmptyContext();
   
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index e236a4f..fd101f6 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -109,7 +109,7 @@ namespace Dasher {
 
   private:
     
-    void BuildContext(std::string strContext, bool bRoot, CLanguageModel::Context &oContext, symbol &iSymbol);
+    void BuildContext(std::string& strContext, bool bRoot, CLanguageModel::Context &oContext, symbol &iSymbol);
 
     void RecursiveIterateGroup(CDasherNode *pParent, SGroupInfo *pInfo, std::vector<symbol> *pSymbols, std::vector<unsigned int> *pCProb, int iMin, int iMax, symbol iExistingSymbol, CDasherNode *pExistingChild);
 
diff --git a/Src/DasherCore/AlphabetManagerFactory.cpp b/Src/DasherCore/AlphabetManagerFactory.cpp
index 28d8c11..3328f0d 100644
--- a/Src/DasherCore/AlphabetManagerFactory.cpp
+++ b/Src/DasherCore/AlphabetManagerFactory.cpp
@@ -37,7 +37,7 @@ CAlphabetManagerFactory::CAlphabetManagerFactory(CDasherInterfaceBase *pInterfac
   // if this is the case then the parameter value should be updated,
   // but not in such a way that it causes everything to be rebuilt.
 
-  Dasher::CAlphIO::AlphInfo oAlphInfo = pAlphIO->GetInfo(pSettingsStore->GetStringParameter(SP_ALPHABET_ID));
+  const Dasher::CAlphIO::AlphInfo &oAlphInfo(pAlphIO->GetInfo(pSettingsStore->GetStringParameter(SP_ALPHABET_ID)));
   m_pAlphabet = new CAlphabet(oAlphInfo);
   m_pCHAlphabet = NULL;
 
diff --git a/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp b/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
index 4f0b31e..35bdec4 100644
--- a/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
@@ -114,7 +114,7 @@ CDictLanguageModel::CDictLanguageModel(Dasher::CEventHandler *pEventHandler, CSe
     //      std::cout << SymbolAlphabet().GetAlphabetPointer() << std::endl;
 
     std::vector < symbol > Symbols;
-    SymbolAlphabet().GetAlphabetPointer()->GetSymbols(&Symbols, &CurrentWord, false);
+    SymbolAlphabet().GetAlphabetPointer()->GetSymbols(Symbols, CurrentWord);
 
     for(std::vector < symbol >::iterator it(Symbols.begin()); it != Symbols.end(); ++it) {
       MyLearnSymbol(TempContext, *it);
diff --git a/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp b/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
index 239ef4e..7886bc6 100644
--- a/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
@@ -166,7 +166,7 @@ CWordLanguageModel::CWordLanguageModel(Dasher::CEventHandler *pEventHandler, CSe
       //      std::cout << SymbolAlphabet().GetAlphabetPointer() << std::endl;
 
       std::vector < symbol > Symbols;
-      SymbolAlphabet().GetAlphabetPointer()->GetSymbols(&Symbols, &CurrentWord, false);
+      SymbolAlphabet().GetAlphabetPointer()->GetSymbols(Symbols, CurrentWord);
 
       for(std::vector < symbol >::iterator it(Symbols.begin()); it != Symbols.end(); ++it) {
         pSpellingModel->LearnSymbol(TempContext, *it);
diff --git a/Src/DasherCore/NodeCreationManager.cpp b/Src/DasherCore/NodeCreationManager.cpp
index 5781702..67a1486 100644
--- a/Src/DasherCore/NodeCreationManager.cpp
+++ b/Src/DasherCore/NodeCreationManager.cpp
@@ -163,18 +163,9 @@ void CNodeCreationManager::GetProbs(CLanguageModel::Context context, std::vector
 
 }
 
-void CNodeCreationManager::LearnText(CLanguageModel::Context context, std::string *TheText, bool IsMore) {
-  std::vector < symbol > Symbols;
-
-  m_pAlphabet->GetSymbols(&Symbols, TheText, IsMore);
-
-  for(unsigned int i = 0; i < Symbols.size(); i++)
-    m_pLanguageModel->LearnSymbol(context, Symbols[i]); // FIXME - conversion to symbol alphabet
-}
-
 void CNodeCreationManager::EnterText(CLanguageModel::Context context, std::string TheText) const {
   std::vector < symbol > Symbols;
-  m_pAlphabet->GetSymbols(&Symbols, &TheText, false);
+  m_pAlphabet->GetSymbols(Symbols, TheText);
   for(unsigned int i = 0; i < Symbols.size(); i++)
     m_pLanguageModel->EnterSymbol(context, Symbols[i]); // FIXME - conversion to symbol alphabet
 }
diff --git a/Src/DasherCore/NodeCreationManager.h b/Src/DasherCore/NodeCreationManager.h
index 8bed6fb..5c0a561 100644
--- a/Src/DasherCore/NodeCreationManager.h
+++ b/Src/DasherCore/NodeCreationManager.h
@@ -65,7 +65,6 @@ class CNodeCreationManager : public Dasher::CDasherComponent {
   }
 
   void GetProbs(Dasher::CLanguageModel::Context context, std::vector <Dasher::symbol >&NewSymbols, std::vector <unsigned int >&Probs, int iNorm) const;
-  void LearnText(Dasher::CLanguageModel::Context context, std::string *TheText, bool IsMore);
   void EnterText(Dasher::CLanguageModel::Context context, std::string TheText) const;
 
   inline int GetColour(Dasher::symbol s, int iPhase) const {
diff --git a/Src/DasherCore/PinYinConversionHelper.cpp b/Src/DasherCore/PinYinConversionHelper.cpp
index 0cec404..494f21c 100644
--- a/Src/DasherCore/PinYinConversionHelper.cpp
+++ b/Src/DasherCore/PinYinConversionHelper.cpp
@@ -72,7 +72,7 @@ unsigned int CPinYinConversionHelper::GetSumPYProbs(Dasher::CLanguageModel::Cont
     std::string HZ = static_cast<std::string>(pCurrentNode->pszConversion);
     // Distribute the remaining space evenly
     
-    m_pCHAlphabet->GetSymbols(&Symbols, &HZ, 0);    
+    m_pCHAlphabet->GetSymbols(Symbols, HZ);    
 
     if(Symbols.size()!=0)
       sumProb += Probs[Symbols[0]];
@@ -126,7 +126,7 @@ void CPinYinConversionHelper::AssignSizes(SCENode **pStart, Dasher::CLanguageMod
 
     std::string HZ(pNode->pszConversion);
 
-    m_pCHAlphabet->GetSymbols(&Symbols, &HZ, 0);    
+    m_pCHAlphabet->GetSymbols(Symbols, HZ);    
 
     if(Symbols.size()!=0){
       pNode->Symbol = Symbols[0];
@@ -237,4 +237,4 @@ void CPinYinConversionHelper::SetFlag(CDasherNode *pNode, int iFlag, bool bValue
 	//Blanked out for new Mandarin Dasher, if we want to have the language model learn as one types, need to work on this part
 	if (iFlag == NF_COMMITTED && bValue) return;
 	CConversionHelper::SetFlag(pNode, iFlag, bValue);
-}
\ No newline at end of file
+}
diff --git a/Src/DasherCore/Trainer.cpp b/Src/DasherCore/Trainer.cpp
index 044b6ea..2839958 100644
--- a/Src/DasherCore/Trainer.cpp
+++ b/Src/DasherCore/Trainer.cpp
@@ -103,7 +103,6 @@ void CMandarinTrainer::Train(const std::string &strUserLoc, const std::string &s
     
   std::string strChar;
   std::string strPY;
-  //char ctemp[4];
   CLanguageModel::Context trainContext = m_pLanguageModel->CreateEmptyContext();
   std::string pyID = "ã??";
   std::vector<symbol> Symchar;
@@ -140,17 +139,17 @@ void CMandarinTrainer::Train(const std::string &strUserLoc, const std::string &s
       strChar.append(strBuffer.substr(3*pos,3));
       std::string strtemp = strBuffer.substr(3*(pos),3);
       Symchar.clear();
-      m_pCHAlphabet->GetSymbols(&Symchar, &strtemp, 0);
+      m_pCHAlphabet->GetSymbols(Symchar, strtemp);
 
       pos++;
           
     }
     Symchar.clear();
     Sympy.clear();
-    m_pCHAlphabet->GetSymbols(&Symchar, &strChar, 0);
-    m_pAlphabet->GetSymbols(&Sympy, &strPY, 0);      
+    m_pCHAlphabet->GetSymbols(Symchar, strChar);
+    m_pAlphabet->GetSymbols(Sympy, strPY);      
     
-    for(int i =0; i<Symchar.size(); i++){
+    for(unsigned int i =0; i<Symchar.size(); i++){
 
       if((Symchar[i]<7603)&&(Symchar[i]>-1)){//Hack here? to prevent lan model from failing
 	
diff --git a/Src/DasherCore/TrainingHelper.cpp b/Src/DasherCore/TrainingHelper.cpp
index 4dd2719..fc00868 100644
--- a/Src/DasherCore/TrainingHelper.cpp
+++ b/Src/DasherCore/TrainingHelper.cpp
@@ -130,7 +130,7 @@ void
 Dasher::CTrainingHelper::HandleEndElement(const XML_Char *szName) {
   if(!strcmp(szName, "segment")) {
     std::vector<Dasher::symbol> vSymbols;
-    m_pAlphabet->GetSymbols(&vSymbols, &m_strCurrentText, false);
+    m_pAlphabet->GetSymbols(vSymbols, m_strCurrentText);
     Train(vSymbols);
     
     m_bInSegment = false;



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]