[dasher] Cleanups to Alphabet code:
- From: Patrick Welche <pwelche src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [dasher] Cleanups to Alphabet code:
- Date: Sat, 15 Aug 2009 14:23:28 +0000 (UTC)
commit 3bc06daeaba3f39ef1e61e42a32ece2356f6d1b4
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date: Mon Aug 10 10:46:29 2009 +0200
Cleanups to Alphabet code:
- move RecursiveAdd into Add, as 3rd param was unused;
- remove unused 'KeyIsPrefix' member/params (was always false!)
- change many signatures to use references not pointers
- remove unused boolean parameter "IsMore" from CAlphabet::GetSymbols
- remove CAlphabet::GetSymbolsFull (use GetSymbols!)
- remove NCManager::LearnText (unused)
- {Un/}signed comparison in AlphabetManagerFactory
(some of this already appeared in earlier patches)
ChangeLog | 2 +
Src/DasherCore/Alphabet/Alphabet.cpp | 54 +++-----------------
Src/DasherCore/Alphabet/Alphabet.h | 17 +-----
Src/DasherCore/AlphabetManager.cpp | 4 +-
Src/DasherCore/AlphabetManager.h | 2 +-
Src/DasherCore/AlphabetManagerFactory.cpp | 2 +-
.../LanguageModelling/DictLanguageModel.cpp | 2 +-
.../LanguageModelling/WordLanguageModel.cpp | 2 +-
Src/DasherCore/NodeCreationManager.cpp | 11 +----
Src/DasherCore/NodeCreationManager.h | 1 -
Src/DasherCore/PinYinConversionHelper.cpp | 6 +-
Src/DasherCore/Trainer.cpp | 9 ++--
Src/DasherCore/TrainingHelper.cpp | 2 +-
13 files changed, 27 insertions(+), 87 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index a338a4c..5210130 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -6,6 +6,8 @@
* DasherView{,Square}: Remove b1D/bNonLinearity.
* MacOSX: Remove ZippyCache as DasherViewOpenGL makes no use of it.
* Remove empty DasherView.inl and move input filters into namespace Dasher.
+ * Change signatures (e.g. GetSymbols) from pointers to references;
+ g/c IsMore, GetSymbolsFull, LearnText.
2009-08-08 Alan Lawrence <acl33 inf phy cam ac uk>
diff --git a/Src/DasherCore/Alphabet/Alphabet.cpp b/Src/DasherCore/Alphabet/Alphabet.cpp
index fc70415..53516ca 100644
--- a/Src/DasherCore/Alphabet/Alphabet.cpp
+++ b/Src/DasherCore/Alphabet/Alphabet.cpp
@@ -184,30 +184,30 @@ void CAlphabet::GetSymbols(std::vector<symbol> &symbols, std::istream &in) const
delete [] utfchar;
}
-void CAlphabet::GetSymbols(std::vector<symbol> *Symbols, std::string * Input, bool IsMore) const
+void CAlphabet::GetSymbols(std::vector<symbol>& Symbols, std::string& Input) const
{
string Tmp;
- symbol CurSymbol = 0, TmpSymbol = 0;
+ symbol CurSymbol = 0;
int extras;
unsigned int bit;
- for(unsigned int i = 0; i < Input->size(); i++) {
+ for(unsigned int i = 0; i < Input.size(); i++) {
- Tmp = (*Input)[i];
+ Tmp = Input[i];
/* The string we've been given is in UTF-8. The symbols are
also in UTF-8, so we need to pass the entire UTF-8 character
which may be several bytes long. RFC 2279 describes this
encoding */
- if((*Input)[i] & 0x80) { // Character is more than 1 byte long
+ if(Input[i] & 0x80) { // Character is more than 1 byte long
extras = 1;
- for(bit = 0x20; ((*Input)[i] & bit) != 0; bit >>= 1)
+ for(bit = 0x20; (Input[i] & bit) != 0; bit >>= 1)
extras++;
if(extras > 5) {
} // Malformed character
while(extras-- > 0) {
- Tmp += (*Input)[++i];
+ Tmp += Input[++i];
}
}
@@ -216,46 +216,6 @@ void CAlphabet::GetSymbols(std::vector<symbol> *Symbols, std::string * Input, bo
if(CurSymbol != 0)
Symbols->push_back(CurSymbol);
}
-
- if(IsMore)
- *Input = "";
-}
-
-void CAlphabet::GetSymbolsFull(std::vector<symbol > *Symbols, std::string *Input) const {
-
- std::string::iterator it = Input->begin();
-
- while(it != Input->end()) {
- unsigned char c = static_cast<unsigned char>(*it);
-
- int iNBytes;
-
- if(c <= 0x7F)
- iNBytes = 1;
- else if((c >= 0xC2) && (c <= 0xDF))
- iNBytes = 2;
- else if((c >= 0xE0) && (c <= 0xEF))
- iNBytes = 3;
- else if((c >= 0xF0) && (c <= 0xF4))
- iNBytes = 4;
- else {
- // TODO: Error condition - handle this.
- iNBytes = 1;
- }
-
- std::string strCurrentSymbol(1, *it);
-
- for(int i = 0; i < iNBytes - 1; ++i) {
- ++it;
- strCurrentSymbol += *it;
- }
-
- // TODO: Error condition on reaching end of string prematurely.
-
- Symbols->push_back(TextMap.Get(strCurrentSymbol));
-
- ++it;
- }
}
// add single char to the character set
diff --git a/Src/DasherCore/Alphabet/Alphabet.h b/Src/DasherCore/Alphabet/Alphabet.h
index 8919e67..52b3b8f 100644
--- a/Src/DasherCore/Alphabet/Alphabet.h
+++ b/Src/DasherCore/Alphabet/Alphabet.h
@@ -107,21 +107,10 @@ namespace Dasher {
// return group membership of i'th symbol
// Fills Symbols with the symbols corresponding to Input. {{{ Note that this
// is not necessarily reversible by repeated use of GetText. Some text
- // may not be recognised and so discarded. If IsMore is true then Input
- // is truncated to any final characters that were not used due to ambiguous
- // continuation. If IsMore is false Input is assumed to be all the available
- // text and so a symbol will be returned for a final "a" even if "ae" is
- // defined as its own symbol. }}}
- void GetSymbols(std::vector<symbol> *Symbols, std::string * Input, bool IsMore) const;
- void GetSymbols(std::vector<symbol> &symbols, std::istream &in) const;
-
-
- /// Look up symbols corresponding to string. Cannot cope with
- /// parial input, but knows about UTF-8, so can include 'unknown
- /// symbol' as 0 in the output. Slower than GetSymbols, so don't
- /// use for import, but is useful for looking up contexts.
- void GetSymbolsFull(std::vector<symbol > *Symbols, std::string *Input) const;
+ // may not be recognised and so discarded. }}}
+ void GetSymbols(std::vector<symbol> &Symbols, std::string& Input, bool IsMore) const;
+ void GetSymbols(std::vector<symbol> &symbols, std::istream &in) const;
void Trace() const; // diagnostic
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index de4d66f..33a48a4 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -501,13 +501,13 @@ void CAlphabetManager::SetFlag(CDasherNode *pNode, int iFlag, bool bValue) {
}
}
-void CAlphabetManager::BuildContext(std::string strContext, bool bRoot, CLanguageModel::Context &oContext, symbol &iSymbol) {
+void CAlphabetManager::BuildContext(std::string &strContext, bool bRoot, CLanguageModel::Context &oContext, symbol &iSymbol) {
// Hopefully this will obsolete any need to handle contexts outside
// of the alphabet manager - check this and remove resulting
// redundant code
std::vector<symbol> vContextSymbols;
- m_pNCManager->GetAlphabet()->GetSymbolsFull(&vContextSymbols, &strContext);
+ m_pNCManager->GetAlphabet()->GetSymbols(vContextSymbols, strContext);
oContext = m_pLanguageModel->CreateEmptyContext();
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index e236a4f..fd101f6 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -109,7 +109,7 @@ namespace Dasher {
private:
- void BuildContext(std::string strContext, bool bRoot, CLanguageModel::Context &oContext, symbol &iSymbol);
+ void BuildContext(std::string& strContext, bool bRoot, CLanguageModel::Context &oContext, symbol &iSymbol);
void RecursiveIterateGroup(CDasherNode *pParent, SGroupInfo *pInfo, std::vector<symbol> *pSymbols, std::vector<unsigned int> *pCProb, int iMin, int iMax, symbol iExistingSymbol, CDasherNode *pExistingChild);
diff --git a/Src/DasherCore/AlphabetManagerFactory.cpp b/Src/DasherCore/AlphabetManagerFactory.cpp
index 28d8c11..3328f0d 100644
--- a/Src/DasherCore/AlphabetManagerFactory.cpp
+++ b/Src/DasherCore/AlphabetManagerFactory.cpp
@@ -37,7 +37,7 @@ CAlphabetManagerFactory::CAlphabetManagerFactory(CDasherInterfaceBase *pInterfac
// if this is the case then the parameter value should be updated,
// but not in such a way that it causes everything to be rebuilt.
- Dasher::CAlphIO::AlphInfo oAlphInfo = pAlphIO->GetInfo(pSettingsStore->GetStringParameter(SP_ALPHABET_ID));
+ const Dasher::CAlphIO::AlphInfo &oAlphInfo(pAlphIO->GetInfo(pSettingsStore->GetStringParameter(SP_ALPHABET_ID)));
m_pAlphabet = new CAlphabet(oAlphInfo);
m_pCHAlphabet = NULL;
diff --git a/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp b/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
index 4f0b31e..35bdec4 100644
--- a/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
@@ -114,7 +114,7 @@ CDictLanguageModel::CDictLanguageModel(Dasher::CEventHandler *pEventHandler, CSe
// std::cout << SymbolAlphabet().GetAlphabetPointer() << std::endl;
std::vector < symbol > Symbols;
- SymbolAlphabet().GetAlphabetPointer()->GetSymbols(&Symbols, &CurrentWord, false);
+ SymbolAlphabet().GetAlphabetPointer()->GetSymbols(Symbols, CurrentWord);
for(std::vector < symbol >::iterator it(Symbols.begin()); it != Symbols.end(); ++it) {
MyLearnSymbol(TempContext, *it);
diff --git a/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp b/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
index 239ef4e..7886bc6 100644
--- a/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
@@ -166,7 +166,7 @@ CWordLanguageModel::CWordLanguageModel(Dasher::CEventHandler *pEventHandler, CSe
// std::cout << SymbolAlphabet().GetAlphabetPointer() << std::endl;
std::vector < symbol > Symbols;
- SymbolAlphabet().GetAlphabetPointer()->GetSymbols(&Symbols, &CurrentWord, false);
+ SymbolAlphabet().GetAlphabetPointer()->GetSymbols(Symbols, CurrentWord);
for(std::vector < symbol >::iterator it(Symbols.begin()); it != Symbols.end(); ++it) {
pSpellingModel->LearnSymbol(TempContext, *it);
diff --git a/Src/DasherCore/NodeCreationManager.cpp b/Src/DasherCore/NodeCreationManager.cpp
index 5781702..67a1486 100644
--- a/Src/DasherCore/NodeCreationManager.cpp
+++ b/Src/DasherCore/NodeCreationManager.cpp
@@ -163,18 +163,9 @@ void CNodeCreationManager::GetProbs(CLanguageModel::Context context, std::vector
}
-void CNodeCreationManager::LearnText(CLanguageModel::Context context, std::string *TheText, bool IsMore) {
- std::vector < symbol > Symbols;
-
- m_pAlphabet->GetSymbols(&Symbols, TheText, IsMore);
-
- for(unsigned int i = 0; i < Symbols.size(); i++)
- m_pLanguageModel->LearnSymbol(context, Symbols[i]); // FIXME - conversion to symbol alphabet
-}
-
void CNodeCreationManager::EnterText(CLanguageModel::Context context, std::string TheText) const {
std::vector < symbol > Symbols;
- m_pAlphabet->GetSymbols(&Symbols, &TheText, false);
+ m_pAlphabet->GetSymbols(Symbols, TheText);
for(unsigned int i = 0; i < Symbols.size(); i++)
m_pLanguageModel->EnterSymbol(context, Symbols[i]); // FIXME - conversion to symbol alphabet
}
diff --git a/Src/DasherCore/NodeCreationManager.h b/Src/DasherCore/NodeCreationManager.h
index 8bed6fb..5c0a561 100644
--- a/Src/DasherCore/NodeCreationManager.h
+++ b/Src/DasherCore/NodeCreationManager.h
@@ -65,7 +65,6 @@ class CNodeCreationManager : public Dasher::CDasherComponent {
}
void GetProbs(Dasher::CLanguageModel::Context context, std::vector <Dasher::symbol >&NewSymbols, std::vector <unsigned int >&Probs, int iNorm) const;
- void LearnText(Dasher::CLanguageModel::Context context, std::string *TheText, bool IsMore);
void EnterText(Dasher::CLanguageModel::Context context, std::string TheText) const;
inline int GetColour(Dasher::symbol s, int iPhase) const {
diff --git a/Src/DasherCore/PinYinConversionHelper.cpp b/Src/DasherCore/PinYinConversionHelper.cpp
index 0cec404..494f21c 100644
--- a/Src/DasherCore/PinYinConversionHelper.cpp
+++ b/Src/DasherCore/PinYinConversionHelper.cpp
@@ -72,7 +72,7 @@ unsigned int CPinYinConversionHelper::GetSumPYProbs(Dasher::CLanguageModel::Cont
std::string HZ = static_cast<std::string>(pCurrentNode->pszConversion);
// Distribute the remaining space evenly
- m_pCHAlphabet->GetSymbols(&Symbols, &HZ, 0);
+ m_pCHAlphabet->GetSymbols(Symbols, HZ);
if(Symbols.size()!=0)
sumProb += Probs[Symbols[0]];
@@ -126,7 +126,7 @@ void CPinYinConversionHelper::AssignSizes(SCENode **pStart, Dasher::CLanguageMod
std::string HZ(pNode->pszConversion);
- m_pCHAlphabet->GetSymbols(&Symbols, &HZ, 0);
+ m_pCHAlphabet->GetSymbols(Symbols, HZ);
if(Symbols.size()!=0){
pNode->Symbol = Symbols[0];
@@ -237,4 +237,4 @@ void CPinYinConversionHelper::SetFlag(CDasherNode *pNode, int iFlag, bool bValue
//Blanked out for new Mandarin Dasher, if we want to have the language model learn as one types, need to work on this part
if (iFlag == NF_COMMITTED && bValue) return;
CConversionHelper::SetFlag(pNode, iFlag, bValue);
-}
\ No newline at end of file
+}
diff --git a/Src/DasherCore/Trainer.cpp b/Src/DasherCore/Trainer.cpp
index 044b6ea..2839958 100644
--- a/Src/DasherCore/Trainer.cpp
+++ b/Src/DasherCore/Trainer.cpp
@@ -103,7 +103,6 @@ void CMandarinTrainer::Train(const std::string &strUserLoc, const std::string &s
std::string strChar;
std::string strPY;
- //char ctemp[4];
CLanguageModel::Context trainContext = m_pLanguageModel->CreateEmptyContext();
std::string pyID = "ã??";
std::vector<symbol> Symchar;
@@ -140,17 +139,17 @@ void CMandarinTrainer::Train(const std::string &strUserLoc, const std::string &s
strChar.append(strBuffer.substr(3*pos,3));
std::string strtemp = strBuffer.substr(3*(pos),3);
Symchar.clear();
- m_pCHAlphabet->GetSymbols(&Symchar, &strtemp, 0);
+ m_pCHAlphabet->GetSymbols(Symchar, strtemp);
pos++;
}
Symchar.clear();
Sympy.clear();
- m_pCHAlphabet->GetSymbols(&Symchar, &strChar, 0);
- m_pAlphabet->GetSymbols(&Sympy, &strPY, 0);
+ m_pCHAlphabet->GetSymbols(Symchar, strChar);
+ m_pAlphabet->GetSymbols(Sympy, strPY);
- for(int i =0; i<Symchar.size(); i++){
+ for(unsigned int i =0; i<Symchar.size(); i++){
if((Symchar[i]<7603)&&(Symchar[i]>-1)){//Hack here? to prevent lan model from failing
diff --git a/Src/DasherCore/TrainingHelper.cpp b/Src/DasherCore/TrainingHelper.cpp
index 4dd2719..fc00868 100644
--- a/Src/DasherCore/TrainingHelper.cpp
+++ b/Src/DasherCore/TrainingHelper.cpp
@@ -130,7 +130,7 @@ void
Dasher::CTrainingHelper::HandleEndElement(const XML_Char *szName) {
if(!strcmp(szName, "segment")) {
std::vector<Dasher::symbol> vSymbols;
- m_pAlphabet->GetSymbols(&vSymbols, &m_strCurrentText, false);
+ m_pAlphabet->GetSymbols(vSymbols, m_strCurrentText);
Train(vSymbols);
m_bInSegment = false;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]