[dasher] Context-switch commands written to user training file
- From: Patrick Welche <pwelche src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dasher] Context-switch commands written to user training file
- Date: Tue, 15 Mar 2011 17:13:12 +0000 (UTC)
commit 7bb2a087335aa58210a01b3abf1a8ee85ebb3546
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date: Tue Mar 8 16:06:37 2011 +0000
Context-switch commands written to user training file
Every cursor-movement, after which text is actually written, will result
in a separate context-switch + entry in training file, although we allow
arbitrary amount of deletion (by reversing) of text just-written without
record.
(NB control mode will break this, i.e. wrong contexts written to file.
About what we expect when the offset's are all wrong!)
Src/DasherCore/AlphabetManager.cpp | 76 ++++++++++++++++++++++++++++++++---
Src/DasherCore/AlphabetManager.h | 21 +++++++++-
2 files changed, 88 insertions(+), 9 deletions(-)
---
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index 7e27b1f..85b950d 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -52,6 +52,16 @@ static char THIS_FILE[] = __FILE__;
CAlphabetManager::CAlphabetManager(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap)
: m_pNCManager(pNCManager), m_pAlphabet(pAlphabet), m_pAlphabetMap(pAlphabetMap), m_pInterface(pInterface) {
+ //Look for a (single-octet) character not in the alphabet...
+ for (char c=33; c<0x80; c++) {
+ string s(&c,1);
+ if (pAlphabetMap->Get(s)==0) {
+ m_sDelim = s;
+ break;
+ }
+ }
+ //else, if all single-octet chars are in alphabet - leave m_sDelim==""
+ // (and we'll find a delimiter for each context)
}
void CAlphabetManager::CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSettingsStore) {
@@ -90,6 +100,22 @@ CAlphabetManager::~CAlphabetManager() {
}
void CAlphabetManager::WriteTrainFileFull(CDasherInterfaceBase *pInterface) {
+ if (strTrainfileBuffer == "") return;
+ if (strTrainfileContext != "") {
+ //If context begins with the default, skip that - it'll be entered by Trainer 1st anyway
+ string defCtx(m_pAlphabet->GetDefaultContext());
+ if (strTrainfileContext.substr(0,defCtx.length()) == defCtx)
+ strTrainfileContext = strTrainfileContext.substr(defCtx.length());
+ string sDelim(m_sDelim);
+ if (sDelim == "") {
+ //find a character not in the context we want to write out
+ char c=33;
+ while (strTrainfileContext.find(c)!=strTrainfileContext.length()) c++; //will terminate, context is ~~5 chars
+ sDelim = string(&c,1);
+ }
+ strTrainfileBuffer = m_pAlphabet->GetContextEscapeChar() + sDelim + strTrainfileContext + sDelim + strTrainfileBuffer;
+ strTrainfileContext="";
+ }
pInterface->WriteTrainFile(m_pAlphabet->GetTrainingFile(), strTrainfileBuffer);
strTrainfileBuffer="";
}
@@ -122,6 +148,17 @@ CAlphabetManager::CAlphBase::CAlphBase(CDasherNode *pParent, int iOffset, unsign
: CDasherNode(pParent, iOffset, iLbnd, iHbnd, iColour, strDisplayText), m_pMgr(pMgr) {
}
+void CAlphabetManager::CAlphBase::Output(Dasher::VECTOR_SYMBOL_PROB* pAdded, int iNormalization) {
+ if (m_pMgr->m_pLastOutput && m_pMgr->m_pLastOutput == Parent())
+ m_pMgr->m_pLastOutput=this;
+ //Case where lastOutput != Parent to subclasses, if they want to.
+ //Note if lastOutput==NULL, we leave it - so the first letter written after startup,
+ // will register as a context switch and write out an empty/default context.
+}
+
+void CAlphabetManager::CAlphBase::Undo(int *pNumDeleted) {
+ if (m_pMgr->m_pLastOutput==this) m_pMgr->m_pLastOutput = Parent();
+}
CAlphabetManager::CAlphNode::CAlphNode(CDasherNode *pParent, int iOffset, unsigned int iLbnd, unsigned int iHbnd, int iColour, const string &strDisplayText, CAlphabetManager *pMgr)
: CAlphBase(pParent, iOffset, iLbnd, iHbnd, iColour, strDisplayText, pMgr), m_pProbInfo(NULL) {
}
@@ -490,6 +527,26 @@ int CAlphabetManager::CSymbolNode::numChars() {
}
void CAlphabetManager::CSymbolNode::Output(Dasher::VECTOR_SYMBOL_PROB* pAdded, int iNormalization) {
+ if (m_pMgr->m_pNCManager->GetBoolParameter(BP_LM_ADAPTIVE)) {
+ if (m_pMgr->m_pLastOutput != Parent()) {
+ //Context changed. Flush to disk the old context + text written in it...
+ m_pMgr->WriteTrainFileFull(m_pMgr->m_pInterface);
+
+ ///Now extract the context in which this node was written.
+ /// Since this node is being output now, its parent must already have been,
+ /// so the simplest thing is to read from the edit buffer!
+ int iStart = max(0, offset() - m_pMgr->m_pLanguageModel->GetContextLength());
+ m_pMgr->strTrainfileContext = m_pMgr->m_pInterface->GetContext(iStart, offset()-iStart);
+ if (m_pMgr->strTrainfileContext=="") //Even the empty context (as for a new document)
+ m_pMgr->strTrainfileContext = m_pMgr->m_pAlphabet->GetDefaultContext(); //is a new ctx!
+ }
+ //Now handle outputting of this node
+ m_pMgr->m_pLastOutput = this;
+ string tr(trainText());
+ m_pMgr->strTrainfileBuffer += tr;
+ //an actual occurrence of the escape character, must be doubled (like \\)
+ if (tr == m_pMgr->m_pAlphabet->GetContextEscapeChar()) m_pMgr->strTrainfileBuffer+=tr;
+ }
//std::cout << this << " " << Parent() << ": Output at offset " << m_iOffset << " *" << m_pMgr->m_pAlphabet->GetText(t) << "* " << std::endl;
Dasher::CEditEvent oEvent(1, outputText(), offset());
@@ -499,18 +556,23 @@ void CAlphabetManager::CSymbolNode::Output(Dasher::VECTOR_SYMBOL_PROB* pAdded, i
if (pAdded != NULL) {
pAdded->push_back(Dasher::SymbolProb(iSymbol, oEvent.m_sText, Range() / (double)iNormalization));
}
- if(m_pMgr->m_pNCManager->GetBoolParameter(BP_LM_ADAPTIVE))
- m_pMgr->strTrainfileBuffer += trainText();
}
void CAlphabetManager::CSymbolNode::Undo(int *pNumDeleted) {
DASHER_ASSERT(GetFlag(NF_SEEN));
+ if (m_pMgr->m_pNCManager->GetBoolParameter(BP_LM_ADAPTIVE)) {
+ if (m_pMgr->m_pLastOutput == this) {
+ //Erase from training buffer, and move lastOutput backwards,
+ // iff this node was actually written (i.e. not rebuilt _from_ context!)
+ std::string &buf(m_pMgr->strTrainfileBuffer);
+ std::string tr(trainText());
+ if (buf.substr(buf.length()-tr.length(),tr.length())==tr) {
+ buf=buf.substr(0,buf.length()-tr.length());
+ m_pMgr->m_pLastOutput = Parent();
+ }
+ }
+ } else CAlphBase::Undo(pNumDeleted);
Dasher::CEditEvent oEvent(2, outputText(), offset());
- //Whilst the node is still NF_SEEN, we don't want to actually delete the text
- // (e.g. outputText() for paragraph symbols will check the edit buffer!)
- if(m_pMgr->m_pNCManager->GetBoolParameter(BP_LM_ADAPTIVE))
- m_pMgr->strTrainfileBuffer = m_pMgr->strTrainfileBuffer.substr( 0, m_pMgr->strTrainfileBuffer.size() - trainText().size());
- //finally delete, the last thing we do...
m_pMgr->m_pNCManager->InsertEvent(&oEvent);
if (pNumDeleted) (*pNumDeleted)++;
}
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index 5915df0..459d996 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -86,6 +86,10 @@ namespace Dasher {
/// would contain this node (see IsInGroup). Subclasses can override to graft themselves into the hierarchy, if appropriate.
/// \param pParent parent of the symbol node to create; could be the previous root, or an intervening node (e.g. group)
virtual CDasherNode *RebuildGroup(CAlphNode *pParent, unsigned int iLbnd, unsigned int iHbnd, const std::string &strEnc, int iBkgCol, const SGroupInfo *pInfo);
+ ///Just keep track of the last node output (for training file purposes)
+ void Undo(int *pNumDeleted);
+ ///Just keep track of the last node output (for training file purposes)
+ void Output(Dasher::VECTOR_SYMBOL_PROB* pAdded, int iNormalization);
protected:
///Called in process of rebuilding parent: fill in the hierarchy _beneath_ the
/// the previous root node, by calling IterateChildGroups passing this node as
@@ -241,9 +245,22 @@ namespace Dasher {
void IterateChildGroups(CAlphNode *pParent, const SGroupInfo *pParentGroup, CAlphBase *buildAround);
CDasherInterfaceBase *m_pInterface;
- ///Text waiting to be written to the user's training file
- /// (probably! Unless they erase back out of the text first)
+
+ ///Last node (owned by this manager) that was output; if a node
+ /// is Undo()ne, this is set to its parent. This is used to detect
+ /// context switches.
+ CDasherNode *m_pLastOutput;
+ ///Text actually written in the current context; both appended and truncated
+ /// as nodes are Output() and Undo()ne.
std::string strTrainfileBuffer;
+ ///Context in (i.e. after) which anything in strTrainfileBuffer was written.
+ /// Set when first character put in strTrainfileBuffer (following a context switch),
+ /// as we may not be able to get the preceding characters if we wait too long.
+ std::string strTrainfileContext;
+
+ ///A character, 33<=c<=255, not in the alphabet; used to delimit contexts.
+ ///"" if no such could be found (=> will be found on a per-context basis)
+ std::string m_sDelim;
};
/// @}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]