[dasher: 17/38] Fix not escaping enough characters in training texts (both Mandarin & normal!)



commit 4700c8bdd6a49a8ffc1799fba8aa46e1584d9f29
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date:   Tue Dec 13 16:40:07 2011 +0000

    Fix not escaping enough characters in training texts (both Mandarin & normal!)

 Src/DasherCore/Alphabet/AlphInfo.cpp |    7 +++++++
 Src/DasherCore/Alphabet/AlphInfo.h   |    3 +++
 Src/DasherCore/AlphabetManager.cpp   |    4 ++++
 Src/DasherCore/AlphabetManager.h     |    4 ++--
 Src/DasherCore/MandarinAlphMgr.cpp   |    9 +++------
 5 files changed, 19 insertions(+), 8 deletions(-)
---
diff --git a/Src/DasherCore/Alphabet/AlphInfo.cpp b/Src/DasherCore/Alphabet/AlphInfo.cpp
index 8f4d958..ab92b16 100644
--- a/Src/DasherCore/Alphabet/AlphInfo.cpp
+++ b/Src/DasherCore/Alphabet/AlphInfo.cpp
@@ -51,6 +51,13 @@ CAlphInfo::CAlphInfo() {
   m_strCtxChar = "Â";
 }
 
+string CAlphInfo::escape(const string &ch) const {
+  if ((m_strConversionTrainStart.length() && ch==m_strConversionTrainStart)
+      || (m_strCtxChar.length() && ch==m_strCtxChar))
+    return ch+ch;
+  return ch;
+}
+
 CAlphInfo::~CAlphInfo() {
   pChild->RecursiveDelete();
   pNext->RecursiveDelete();
diff --git a/Src/DasherCore/Alphabet/AlphInfo.h b/Src/DasherCore/Alphabet/AlphInfo.h
index 34233ff..888ef71 100644
--- a/Src/DasherCore/Alphabet/AlphInfo.h
+++ b/Src/DasherCore/Alphabet/AlphInfo.h
@@ -65,6 +65,9 @@ namespace Dasher {
 /// than the highest valid index.
 class Dasher::CAlphInfo : public SGroupInfo {
 public:
+  ///Format a character ready to write to a training file, by doubling
+  /// up any escape character (context-switch / conversion-start)
+  std::string escape(const std::string &ch) const;
   
   const std::string &GetID() const {return AlphID;}
 
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index 9d9c98d..213f714 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -617,6 +617,10 @@ const std::string &CAlphabetManager::CSymbolNode::outputText() const {
   return mgr()->m_pAlphabet->GetText(iSymbol);
 }
 
+string CAlphabetManager::CSymbolNode::trainText() {
+  return m_pMgr->m_pAlphabet->escape(outputText());
+}
+
 int CAlphabetManager::CSymbolNode::numChars() {
   return (outputText()=="\r\n") ? 2 : 1;
 }
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index ef70785..f3ceeb6 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -191,8 +191,8 @@ namespace Dasher {
     protected:
       virtual const std::string &outputText() const;
       ///Text to write to user training file/buffer when this symbol output.
-      /// Default just returns (a new string constructed from) outputText()
-      virtual std::string trainText() {return outputText();}
+      /// Default just returns the output text escaped if necessary.
+      virtual std::string trainText();
       /// Number of unicode _characters_ (not octets) for this symbol.
       /// Uniquely, a paragraph symbol can enter two distinct unicode characters
       /// (i.e. '\r' and '\n'); every other symbol enters only a single
diff --git a/Src/DasherCore/MandarinAlphMgr.cpp b/Src/DasherCore/MandarinAlphMgr.cpp
index d388de9..b8d17b1 100644
--- a/Src/DasherCore/MandarinAlphMgr.cpp
+++ b/Src/DasherCore/MandarinAlphMgr.cpp
@@ -539,14 +539,11 @@ string CMandarinAlphMgr::CMandSym::trainText() {
   //if there is only one possible PY that might have lead to this CH sym, no need
   // to record that in the training text
   set<symbol> &py(mgr()->m_vGroupsByConversion[iSymbol]);
-  if (py.size()==1) {
-    string s = outputText();
-    if (s==mgr()->m_pAlphabet->m_strConversionTrainStart)
-      return s+s;
+  string s = CSymbolNode::trainText();
+  if (py.size()==1)
     return s;
-  }
   //otherwise, ambiguous, record name
   if (!m_pyParent) return ""; //output nothing! TODO could reset context for what follows - but this really shouldn't ever happen?
   
-  return mgr()->m_pAlphabet->m_strConversionTrainStart + mgr()->m_vGroupNames[m_pyParent] + mgr()->m_pAlphabet->m_strConversionTrainStop + outputText();
+  return mgr()->m_pAlphabet->m_strConversionTrainStart + mgr()->m_vGroupNames[m_pyParent] + mgr()->m_pAlphabet->m_strConversionTrainStop + s;
 }



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]