[dasher] Mandarin user training files!
- From: Patrick Welche <pwelche src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dasher] Mandarin user training files!
- Date: Tue, 15 Mar 2011 17:13:02 +0000 (UTC)
commit 494a651ebb545092bb4d9c57f1f48dd4d2da2419
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date: Fri Mar 4 11:22:49 2011 +0000
Mandarin user training files!
Add separate trainText() method; default calls outputText(), Mandarin overrides.
Src/DasherCore/AlphabetManager.cpp | 12 +++++++-----
Src/DasherCore/AlphabetManager.h | 3 +++
Src/DasherCore/MandarinAlphMgr.cpp | 20 +++++++++++++++++++-
Src/DasherCore/MandarinAlphMgr.h | 3 +++
4 files changed, 32 insertions(+), 6 deletions(-)
---
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index 2e0c04c..64f4a00 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -497,20 +497,22 @@ void CAlphabetManager::CSymbolNode::Output(Dasher::VECTOR_SYMBOL_PROB* pAdded, i
// Track this symbol and its probability for logging purposes
if (pAdded != NULL) {
- pAdded->push_back(Dasher::SymbolProb(iSymbol, outputText(), Range() / (double)iNormalization));
+ pAdded->push_back(Dasher::SymbolProb(iSymbol, oEvent.m_sText, Range() / (double)iNormalization));
}
if(m_pMgr->m_pNCManager->GetBoolParameter(BP_LM_ADAPTIVE))
- m_pMgr->strTrainfileBuffer += oEvent.m_sText;
+ m_pMgr->strTrainfileBuffer += trainText();
}
void CAlphabetManager::CSymbolNode::Undo(int *pNumDeleted) {
DASHER_ASSERT(GetFlag(NF_SEEN));
Dasher::CEditEvent oEvent(2, outputText(), offset());
+ //Whilst the node is still NF_SEEN, we don't want to actually delete the text
+ // (e.g. outputText() for paragraph symbols will check the edit buffer!)
+ if(m_pMgr->m_pNCManager->GetBoolParameter(BP_LM_ADAPTIVE))
+ m_pMgr->strTrainfileBuffer = m_pMgr->strTrainfileBuffer.substr( 0, m_pMgr->strTrainfileBuffer.size() - trainText().size());
+ //finally delete, the last thing we do...
m_pMgr->m_pNCManager->InsertEvent(&oEvent);
if (pNumDeleted) (*pNumDeleted)++;
- if(m_pMgr->m_pNCManager->GetBoolParameter(BP_LM_ADAPTIVE))
- m_pMgr->strTrainfileBuffer = m_pMgr->strTrainfileBuffer.substr( 0, m_pMgr->strTrainfileBuffer.size() - oEvent.m_sText.size());
-
}
CDasherNode *CAlphabetManager::CGroupNode::RebuildParent() {
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index daf59f1..5915df0 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -142,6 +142,9 @@ namespace Dasher {
virtual CDasherNode *RebuildSymbol(CAlphNode *pParent, unsigned int iLbnd, unsigned int iHbnd, const std::string &strGroup, int iBkgCol, symbol iSymbol);
protected:
virtual const std::string &outputText();
+ ///Text to write to user training file/buffer when this symbol output.
+ /// Default just returns (a new string constructed from) outputText()
+ virtual std::string trainText() {return outputText();}
/// Number of unicode _characters_ (not octets) for this symbol.
/// Uniquely, a paragraph symbol can enter two distinct unicode characters
/// (i.e. '\r' and '\n'); every other symbol enters only a single
diff --git a/Src/DasherCore/MandarinAlphMgr.cpp b/Src/DasherCore/MandarinAlphMgr.cpp
index 574fce8..f4e464f 100644
--- a/Src/DasherCore/MandarinAlphMgr.cpp
+++ b/Src/DasherCore/MandarinAlphMgr.cpp
@@ -408,6 +408,24 @@ void CMandarinAlphMgr::CMandSym::RebuildForwardsFromAncestor(CAlphNode *pNewNode
}
const std::string &CMandarinAlphMgr::CMandSym::outputText() {
- //use chinese, not pinyin, alphabet...
return mgr()->m_CHtext[iSymbol];
}
+
+string CMandarinAlphMgr::CMandSym::trainText() {
+ //NF_COMMITTED should be in process of being set...
+ DASHER_ASSERT(!GetFlag(NF_COMMITTED));
+ //in which case, we should have a parent (if not, we would have to
+ // have been built from string context, i.e. going backwards,
+ // in which case we would be committed already)
+ DASHER_ASSERT(Parent());
+ //so the parent should have set our m_pyParent field...
+ DASHER_ASSERT(m_pyParent);
+ int iPY = m_pyParent;
+ if (iPY==0) {
+ std::set<symbol> &py(mgr()->m_PinyinByChinese[iSymbol]);
+ DASHER_ASSERT(py.size()==1);
+ if (py.size()==1) iPY = *(py.begin());
+ else return ""; //output nothing! TODO could reset context for what follows - but don't think this should ever happen?
+ }
+ return mgr()->m_pAlphabet->m_strConversionTrainingDelimiter + mgr()->m_pAlphabet->GetText(iPY) + outputText();
+}
diff --git a/Src/DasherCore/MandarinAlphMgr.h b/Src/DasherCore/MandarinAlphMgr.h
index b904179..b2264d3 100644
--- a/Src/DasherCore/MandarinAlphMgr.h
+++ b/Src/DasherCore/MandarinAlphMgr.h
@@ -96,6 +96,9 @@ namespace Dasher {
void RebuildForwardsFromAncestor(CAlphNode *pNewNode);
bool isInGroup(const SGroupInfo *pGroup);
private:
+ ///Override to output a triple (delimiter,PY,CH)
+ virtual std::string trainText();
+ ///Override to use use chinese, not pinyin, alphabet
virtual const std::string &outputText();
///The Pinyin symbol used to produce this chinese symbol, if known (0 if not!)
symbol m_pyParent;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]