[dasher] When (re)building nodes, only use characters in current alphabet to make context
- From: Patrick Welche <pwelche src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [dasher] When (re)building nodes, only use characters in current alphabet to make context
- Date: Sat, 19 Dec 2009 22:20:24 +0000 (UTC)
commit 7e2c7e54c790b7e6d180c95e234f563e4022e7fb
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date: Fri Dec 18 15:46:55 2009 +0000
When (re)building nodes, only use characters in current alphabet to make context
(any non-character ignored, now along with anything previous).
Default context used if no legal characters before context.
Refuses to RebuildParent any node for a symbol not in alphabet
(user must change language to reverse any further)
Also put in a quick one-line implementation of DasherModel::GetOffset. Is it ok?
Src/DasherCore/Alphabet/Alphabet.cpp | 36 +++---------------------
Src/DasherCore/Alphabet/Alphabet.h | 2 +-
Src/DasherCore/AlphabetManager.cpp | 50 +++++++++++++++++++++++++++------
Src/DasherCore/DasherModel.h | 5 ++-
4 files changed, 49 insertions(+), 44 deletions(-)
---
diff --git a/Src/DasherCore/Alphabet/Alphabet.cpp b/Src/DasherCore/Alphabet/Alphabet.cpp
index 2bfa11a..328a1ab 100644
--- a/Src/DasherCore/Alphabet/Alphabet.cpp
+++ b/Src/DasherCore/Alphabet/Alphabet.cpp
@@ -26,7 +26,7 @@
#include "Alphabet.h"
#include "AlphabetMap.h"
#include <cstring>
-
+#include <sstream>
using namespace Dasher;
using namespace std;
@@ -185,38 +185,10 @@ void CAlphabet::GetSymbols(std::vector<symbol> &symbols, std::istream &in) const
delete [] utfchar;
}
-void CAlphabet::GetSymbols(std::vector<symbol>& Symbols, std::string& Input) const
+void CAlphabet::GetSymbols(std::vector<symbol>& Symbols, const std::string& Input) const
{
- string Tmp;
- symbol CurSymbol = 0;
- int extras;
- unsigned int bit;
-
- for(unsigned int i = 0; i < Input.size(); i++) {
-
- Tmp = Input[i];
-
- /* The string we've been given is in UTF-8. The symbols are
- also in UTF-8, so we need to pass the entire UTF-8 character
- which may be several bytes long. RFC 2279 describes this
- encoding */
-
- if(Input[i] & 0x80) { // Character is more than 1 byte long
- extras = 1;
- for(bit = 0x20; (Input[i] & bit) != 0; bit >>= 1)
- extras++;
- if(extras > 5) {
- } // Malformed character
- while(extras-- > 0) {
- Tmp += Input[++i];
- }
- }
-
- CurSymbol = TextMap.Get(Tmp);
-
- if(CurSymbol != 0)
- Symbols.push_back(CurSymbol);
- }
+ std::istringstream in(Input);
+ GetSymbols(Symbols, in);
}
// add single char to the character set
diff --git a/Src/DasherCore/Alphabet/Alphabet.h b/Src/DasherCore/Alphabet/Alphabet.h
index 6822019..da5fe49 100644
--- a/Src/DasherCore/Alphabet/Alphabet.h
+++ b/Src/DasherCore/Alphabet/Alphabet.h
@@ -109,7 +109,7 @@ namespace Dasher {
// is not necessarily reversible by repeated use of GetText. Some text
// may not be recognised and so discarded. }}}
- void GetSymbols(std::vector<symbol> &Symbols, std::string &Input) const;
+ void GetSymbols(std::vector<symbol> &Symbols, const std::string &Input) const;
void GetSymbols(std::vector<symbol> &symbols, std::istream &in) const;
void Trace() const; // diagnostic
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index a35f6d0..2d00985 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -76,7 +76,15 @@ CAlphabetManager::CGroupNode *CAlphabetManager::makeGroup(CDasherNode *pParent,
CAlphabetManager::CAlphNode *CAlphabetManager::GetRoot(CDasherNode *pParent, int iLower, int iUpper, bool bEnteredLast, int iOffset) {
CAlphNode *pNewNode = BuildNodeForOffset(pParent, iLower, iUpper, bEnteredLast, max(-1,iOffset-1));
-
+ if (!pNewNode) {
+ DASHER_ASSERT(bEnteredLast);
+ //could not build a node 'responsible' for entering the preceding character,
+ // as said character is not in the current alphabet! However, we'll allow the
+ // user to start entering text afresh
+ return BuildNodeForOffset(pParent, iLower, iUpper, false, iOffset);
+ // (the new node'll be constructed using the current alphabet's default context,
+ // i.e. start of sentence)
+ }
pNewNode->SetFlag(NF_SEEN, true);
// if(m_bGameMode) {
@@ -108,13 +116,30 @@ CAlphabetManager::CAlphNode *CAlphabetManager::BuildNodeForOffset(CDasherNode *p
CAlphNode *pNewNode;
CLanguageModel::Context iContext = m_pLanguageModel->CreateEmptyContext();
-
- for(std::vector<symbol>::iterator it(vContextSymbols.begin()); it != vContextSymbols.end(); ++it)
- if(*it != 0)
- m_pLanguageModel->EnterSymbol(iContext, *it);
-
- if((vContextSymbols.size() == 0) || !bSym) {
- //this node can't be responsible for entering the last symbol if there wasn't one!
+
+ std::vector<symbol>::iterator it = vContextSymbols.end();
+ while (it!=vContextSymbols.begin()) {
+ if (*(--it) == 0) {
+ //found an impossible symbol! start after it
+ ++it;
+ break;
+ }
+ }
+ if (it == vContextSymbols.end()) {
+ //previous character was not in the alphabet!
+ if (bSym) return NULL; //can't construct a node "responsible" for entering such a character!
+ //ok. Create a node as if we were starting a new sentence...
+ vContextSymbols.clear();
+ m_pNCManager->GetAlphabet()->GetSymbols(vContextSymbols, m_pNCManager->GetAlphabet()->GetDefaultContext());
+ it = vContextSymbols.begin();
+ //TODO: What it the default context somehow contains symbols not in the alphabet?
+ }
+ //enter the symbols we could make sense of, into the LM context...
+ while (it != vContextSymbols.end()) {
+ m_pLanguageModel->EnterSymbol(iContext, *(it++));
+ }
+
+ if(!bSym) {
pDisplayInfo->strDisplayText = ""; //equivalent to do m_pNCManager->GetAlphabet()->GetDisplayText(0)
pDisplayInfo->iColour = m_pNCManager->GetAlphabet()->GetColour(0, iNewOffset%2);
pNewNode = makeGroup(pParent, iLower, iUpper, pDisplayInfo, NULL);
@@ -426,7 +451,14 @@ CDasherNode *CAlphabetManager::CAlphNode::RebuildParent(int iNewOffset) {
if (Parent()) return Parent();
CAlphNode *pNewNode = m_pMgr->BuildNodeForOffset(NULL, 0, 0, iNewOffset!=-1, iNewOffset);
-
+ if (!pNewNode) {
+ //could not rebuild parent node, as the preceding character was not
+ // in the current alphabet. Returning null means the user won't be able
+ // to reverse any further; he'll have to change language (to one
+ // including that symbol) instead.
+ return NULL;
+ }
+
//now fill in the new node - recursively - until it reaches us
m_pMgr->IterateChildGroups(pNewNode, NULL, this);
diff --git a/Src/DasherCore/DasherModel.h b/Src/DasherCore/DasherModel.h
index ea8747f..f2f4b5d 100644
--- a/Src/DasherCore/DasherModel.h
+++ b/Src/DasherCore/DasherModel.h
@@ -186,11 +186,12 @@ class Dasher::CDasherModel:public CFrameRate, private NoClones
void SetOffset(int iLocation, CDasherView *pView);
///
- /// TODO: Implement this
+ /// TODO: Figure out how all these "offset"s work / relate to each other - if they do! In particular,
+ /// what do we need DasherModel's own m_iOffset (which measures in _bytes_, not unicode characters!) for?
///
int GetOffset() {
- return 0;
+ return m_pLastOutput->m_iOffset+1;
};
/// Create the children of a Dasher node
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]