[dasher] Fix Mandarin training & LM contexts!
- From: Patrick Welche <pwelche src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dasher] Fix Mandarin training & LM contexts!
- Date: Tue, 15 Mar 2011 17:11:27 +0000 (UTC)
commit 979579688bfa41f476d8e7729a1370c20828f6b8
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date: Mon Feb 7 23:41:07 2011 +0000
Fix Mandarin training & LM contexts!
Make TrainingHelper::LoadFile virtual, so MandarinHelper::LoadFile is used (!)
Fix & tidy MandarinTrainer::LoadFile
Generate appropriate context for child symbols
(child symbol was used to update parent node's context, not child node!)
Src/DasherCore/MandarinAlphMgr.cpp | 2 +-
Src/DasherCore/Trainer.cpp | 55 +++++++++++++----------------------
Src/DasherCore/TrainingHelper.h | 2 +-
3 files changed, 23 insertions(+), 36 deletions(-)
---
diff --git a/Src/DasherCore/MandarinAlphMgr.cpp b/Src/DasherCore/MandarinAlphMgr.cpp
index a75a856..60f0550 100644
--- a/Src/DasherCore/MandarinAlphMgr.cpp
+++ b/Src/DasherCore/MandarinAlphMgr.cpp
@@ -181,7 +181,7 @@ void CMandarinAlphMgr::CConvRoot::PopulateChildren() {
pNewNode->iContext = m_pMgr->m_pLanguageModel->CloneContext(this->iContext);
- m_pMgr->m_pLanguageModel->EnterSymbol(iContext, it->first); // TODO: Don't use symbols?
+ m_pMgr->m_pLanguageModel->EnterSymbol(pNewNode->iContext, it->first); // TODO: Don't use symbols?
DASHER_ASSERT(GetChildren().back()==pNewNode);
diff --git a/Src/DasherCore/Trainer.cpp b/Src/DasherCore/Trainer.cpp
index fed75f1..9c84991 100644
--- a/Src/DasherCore/Trainer.cpp
+++ b/Src/DasherCore/Trainer.cpp
@@ -75,47 +75,34 @@ void CMandarinTrainer::LoadFile(const std::string &strPath) {
else
lim = charsize;
- size_t pos =0;//position in 3's counting on
- while(pos<lim*3){
+ for (size_t pos=0;;) { //position in 3's counting on
- while(pyID.compare(strBuffer.substr(3*pos,3))!=0)
- pos++;
+ while(pos<lim*3)
+ if (pyID.compare(strBuffer.substr(3*pos++,3))==0) break;
+ //leave pos just after the pyID symbol
- pos++;
- // strBuffer.copy(ctemp,3,3*pos);
-
- strPY.append(strBuffer.substr(3*pos,3));
-
- pos++;
+ if (pos+1>=lim*3) break;
+ strPY=strBuffer.substr(3*pos++,3);
//strBuffer.copy(ctemp,3,3*pos);
- strChar.append(strBuffer.substr(3*pos,3));
- std::string strtemp = strBuffer.substr(3*(pos),3);
+ strChar=strBuffer.substr(3*pos++,3);
+
Symchar.clear();
- m_pCHAlphabet->GetSymbols(Symchar, strtemp);
+ Sympy.clear();
+
+ m_pCHAlphabet->GetSymbols(Symchar, strChar);
+ m_pAlphabet->GetSymbols(Sympy, strPY);
+ DASHER_ASSERT(Symchar.size()==1);
+ DASHER_ASSERT(Sympy.size()==1);
+#ifdef DEBUG
+ if (Symchar[0]<=0)
+ std::cout << "Unknown chinese character " << strChar << std::endl;
+#endif
- pos++;
-
+ static_cast<CPPMPYLanguageModel *>(m_pLanguageModel)->LearnPYSymbol(trainContext, Sympy[0]);
+ m_pLanguageModel->LearnSymbol(trainContext, Symchar[0]);
+ numberofchar++;
}
- Symchar.clear();
- Sympy.clear();
- m_pCHAlphabet->GetSymbols(Symchar, strChar);
- m_pAlphabet->GetSymbols(Sympy, strPY);
-
- for(unsigned int i =0; i<Symchar.size(); i++){
-
- if((Symchar[i]<7603)&&(Symchar[i]>-1)){//Hack here? to prevent lan model from failing
-
- static_cast<CPPMPYLanguageModel *>(m_pLanguageModel)->LearnPYSymbol(trainContext, Sympy[i]);
- m_pLanguageModel->LearnSymbol(trainContext, Symchar[i]);
-
- }
-
- // if(Sym.size()>0)
-
- numberofchar = numberofchar + Symchar.size();
- }
-
}
//std::cout<<"The Length of Training file is "<<numberofchar<<" bytes/py characters"<<std::endl;
}
diff --git a/Src/DasherCore/TrainingHelper.h b/Src/DasherCore/TrainingHelper.h
index cac41ac..592578c 100644
--- a/Src/DasherCore/TrainingHelper.h
+++ b/Src/DasherCore/TrainingHelper.h
@@ -39,7 +39,7 @@ namespace Dasher {
void HandleCData(const XML_Char *szS,
int iLen);
- void LoadFile(const std::string &strFileName);
+ virtual void LoadFile(const std::string &strFileName);
protected:
const Dasher::CAlphabetMap *m_pAlphabet;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]