[dasher] Most LanguageModels need only #syms, not alphabet; some are not DasherComponents



commit 5004edb476be154856162b25f8b17c593459fc2e
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date:   Sun Feb 20 22:24:27 2011 +0000

    Most LanguageModels need only #syms, not alphabet; some are not DasherComponents
    
     => simplify CLanguageModel base class
    
    Somewhere in here I found it necessary to remove an #include from AlphInfo,
      this removes a cyclic #include but did have some fallout!

 Src/DasherCore/Alphabet/AlphInfo.h                 |    1 -
 Src/DasherCore/Alphabet/AlphabetMap.h              |    1 +
 Src/DasherCore/AlphabetManager.cpp                 |    4 +-
 Src/DasherCore/AlphabetManager.h                   |    4 ++-
 Src/DasherCore/ConversionManager.h                 |    1 +
 Src/DasherCore/DasherNode.h                        |    1 +
 .../LanguageModelling/CTWLanguageModel.cpp         |    3 +-
 .../LanguageModelling/CTWLanguageModel.h           |    2 +-
 .../LanguageModelling/DictLanguageModel.cpp        |    6 ++--
 .../LanguageModelling/DictLanguageModel.h          |    8 ++--
 Src/DasherCore/LanguageModelling/LanguageModel.cpp |   37 --------------------
 Src/DasherCore/LanguageModelling/LanguageModel.h   |   13 +++----
 Src/DasherCore/LanguageModelling/Makefile.am       |    1 -
 .../LanguageModelling/MixtureLanguageModel.h       |    8 ++--
 .../LanguageModelling/PPMLanguageModel.cpp         |    8 ++--
 .../LanguageModelling/PPMLanguageModel.h           |    9 +++--
 .../LanguageModelling/PPMPYLanguageModel.cpp       |   12 +++----
 .../LanguageModelling/PPMPYLanguageModel.h         |   13 +++----
 .../LanguageModelling/WordLanguageModel.cpp        |    6 ++--
 .../LanguageModelling/WordLanguageModel.h          |    8 +++--
 Src/DasherCore/MandarinAlphMgr.cpp                 |    2 +-
 Src/MacOSX/Dasher.xcodeproj/project.pbxproj        |    4 --
 Src/iPhone/Dasher.xcodeproj/project.pbxproj        |    4 --
 23 files changed, 55 insertions(+), 101 deletions(-)
---
diff --git a/Src/DasherCore/Alphabet/AlphInfo.h b/Src/DasherCore/Alphabet/AlphInfo.h
index 6cce20e..5b1a08f 100644
--- a/Src/DasherCore/Alphabet/AlphInfo.h
+++ b/Src/DasherCore/Alphabet/AlphInfo.h
@@ -29,7 +29,6 @@
 #include "../DasherTypes.h"
 #include "AlphabetMap.h"
 #include "GroupInfo.h"
-#include "AlphIO.h"
 
 #include <string>
 #include <vector>
diff --git a/Src/DasherCore/Alphabet/AlphabetMap.h b/Src/DasherCore/Alphabet/AlphabetMap.h
index fedad4f..6bab5de 100644
--- a/Src/DasherCore/Alphabet/AlphabetMap.h
+++ b/Src/DasherCore/Alphabet/AlphabetMap.h
@@ -21,6 +21,7 @@
 
 namespace Dasher {
   class CAlphabetMap;
+  class CAlphInfo;
 } 
 
 /// \ingroup Alphabet
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index 19075d6..91af931 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -61,7 +61,7 @@ void CAlphabetManager::CreateLanguageModel(CEventHandler *pEventHandler, CSettin
       // If there is a bogus value for the language model ID, we'll default
       // to our trusty old PPM language model.      
     case 0:
-      m_pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet);
+      m_pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet->GetNumberTextSymbols());
       break;
     case 2:
       m_pLanguageModel = new CWordLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet, m_pAlphabetMap);
@@ -70,7 +70,7 @@ void CAlphabetManager::CreateLanguageModel(CEventHandler *pEventHandler, CSettin
       m_pLanguageModel = new CMixtureLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet, m_pAlphabetMap);
       break;  
     case 4:
-      m_pLanguageModel = new CCTWLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet);
+      m_pLanguageModel = new CCTWLanguageModel(m_pAlphabet->GetNumberTextSymbols());
       break;
   }
   
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index d0c4695..5d386a2 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -23,9 +23,11 @@
 
 #include "LanguageModelling/LanguageModel.h"
 #include "DasherNode.h"
-#include "Parameters.h"
 #include "NodeManager.h"
 #include "Trainer.h"
+#include "Alphabet/AlphInfo.h"
+#include "SettingsStore.h"
+#include "EventHandler.h"
 
 class CNodeCreationManager;
 struct SGroupInfo;
diff --git a/Src/DasherCore/ConversionManager.h b/Src/DasherCore/ConversionManager.h
index 72ae66e..de526f4 100644
--- a/Src/DasherCore/ConversionManager.h
+++ b/Src/DasherCore/ConversionManager.h
@@ -26,6 +26,7 @@
 #include "DasherNode.h"
 #include "SCENode.h"
 #include "NodeManager.h"
+#include "Alphabet/AlphInfo.h"
 
 // TODO: Conversion manager needs to deal with offsets and contexts - Will: See Phil for an explanation.
 
diff --git a/Src/DasherCore/DasherNode.h b/Src/DasherCore/DasherNode.h
index 66620c6..3e809b8 100644
--- a/Src/DasherCore/DasherNode.h
+++ b/Src/DasherCore/DasherNode.h
@@ -26,6 +26,7 @@
 #include "LanguageModelling/LanguageModel.h"
 #include "DasherTypes.h"
 #include "NodeManager.h"
+#include "Alphabet/AlphabetMap.h"
 
 namespace Dasher {
   class CDasherNode;
diff --git a/Src/DasherCore/LanguageModelling/CTWLanguageModel.cpp b/Src/DasherCore/LanguageModelling/CTWLanguageModel.cpp
index 93911cd..50053b9 100644
--- a/Src/DasherCore/LanguageModelling/CTWLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/CTWLanguageModel.cpp
@@ -40,8 +40,7 @@ static char THIS_FILE[] = __FILE__;
 #endif
 
 
-CCTWLanguageModel::CCTWLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CAlphInfo *pAlph)
-:CLanguageModel(pEventHandler, pSettingsStore, pAlph){
+CCTWLanguageModel::CCTWLanguageModel(int iNumSyms) : CLanguageModel(iNumSyms) {
 
 	Dasher::CHashTable HashTable;  // create hashtable
 	MaxDepth = 6;   // Maximum depth of the context tree
diff --git a/Src/DasherCore/LanguageModelling/CTWLanguageModel.h b/Src/DasherCore/LanguageModelling/CTWLanguageModel.h
index d151430..154304d 100644
--- a/Src/DasherCore/LanguageModelling/CTWLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/CTWLanguageModel.h
@@ -39,7 +39,7 @@ namespace Dasher {
   // CTW language model 
   class CCTWLanguageModel: public CLanguageModel {
   public:    
-	CCTWLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph);
+	CCTWLanguageModel(int iNumSyms);
 	virtual ~ CCTWLanguageModel(); 
 
     Context CreateEmptyContext();			
diff --git a/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp b/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
index 34b99b8..9e7077c 100644
--- a/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
@@ -91,7 +91,7 @@ CDictLanguageModel::CDictnode * CDictLanguageModel::AddSymbolToNode(CDictnode *p
 /////////////////////////////////////////////////////////////////////
 
 CDictLanguageModel::CDictLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap)
-:CLanguageModel(pEventHandler, pSettingsStore, pAlph), m_pAlphMap(pAlphMap), NodesAllocated(0), max_order(0), m_NodeAlloc(8192), m_ContextAlloc(1024) {
+:CLanguageModel(pAlph->GetNumberTextSymbols()), CDasherComponent(pEventHandler, pSettingsStore), m_pAlphMap(pAlphMap), m_iSpaceSymbol(pAlph->GetSpaceSymbol()), NodesAllocated(0), max_order(0), m_NodeAlloc(8192), m_ContextAlloc(1024) {
   m_pRoot = m_NodeAlloc.Alloc();
   m_pRoot->sbl = -1;
   m_rootcontext = new CDictContext(m_pRoot, 0);
@@ -530,7 +530,7 @@ void CDictLanguageModel::AddSymbol(CDictLanguageModel::CDictContext &context, sy
 
   // Collapse the context if we have started a new word
 
-  if(sym == m_pAlphabet->GetSpaceSymbol()) {
+  if(sym == m_iSpaceSymbol) {
     CollapseContext(context);
   }
 
@@ -562,7 +562,7 @@ void CDictLanguageModel::EnterSymbol(Context c, int Symbol) {
   // collapse the context - the information required to update the
   // word part of the context is stored in the string.
 
-  if(Symbol == m_pAlphabet->GetSpaceSymbol()) {
+  if(Symbol == m_iSpaceSymbol) {
     CollapseContext(context);
     return;
   }
diff --git a/Src/DasherCore/LanguageModelling/DictLanguageModel.h b/Src/DasherCore/LanguageModelling/DictLanguageModel.h
index d1911c5..8db59c3 100644
--- a/Src/DasherCore/LanguageModelling/DictLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/DictLanguageModel.h
@@ -11,10 +11,9 @@
 
 #include "../../Common/NoClones.h"
 #include "../../Common/Allocators/PooledAlloc.h"
-#include "LanguageModel.h"
 #include "PPMLanguageModel.h"
-#include "../DasherTypes.h"
-
+#include "../Alphabet/AlphInfo.h"
+#include "../Alphabet/AlphabetMap.h"
 #include <vector>
 #include <map>
 #include <string>
@@ -26,7 +25,7 @@
 namespace Dasher {
   /// \ingroup LM
   /// \{
-  class CDictLanguageModel:public CLanguageModel {
+  class CDictLanguageModel:public CLanguageModel, public CDasherComponent {
   public:
     CDictLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap);
     virtual ~CDictLanguageModel();
@@ -82,6 +81,7 @@ namespace Dasher {
     };
     
     const CAlphabetMap *m_pAlphMap;
+    const int m_iSpaceSymbol;
 
     CDictnode *AddSymbolToNode(CDictnode * pNode, symbol sym, int *update);
 
diff --git a/Src/DasherCore/LanguageModelling/LanguageModel.h b/Src/DasherCore/LanguageModelling/LanguageModel.h
index e23c307..816f9f8 100644
--- a/Src/DasherCore/LanguageModelling/LanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/LanguageModel.h
@@ -11,8 +11,7 @@
 
 #include "../DasherTypes.h"
 
-#include "../Alphabet/AlphIO.h"
-#include "../DasherComponent.h"
+
 #include <vector>
 
 /////////////////////////////////////////////////////////////////////////////
@@ -30,17 +29,15 @@ namespace Dasher {
 /// Base class for all language model components
 ///
 
-class Dasher::CLanguageModel:public Dasher::CDasherComponent
+class Dasher::CLanguageModel
 {
 public:
 
   /////////////////////////////////////////////////////////////////////////////
 
-  CLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CAlphInfo *pAlph);
+  CLanguageModel(int iNumSyms) : m_iNumSyms(iNumSyms) {};
 
   virtual ~CLanguageModel() {};
-
-  virtual void HandleEvent(Dasher::CEvent * pEvent);
   
   /// 
   /// Index of registered context 
@@ -148,10 +145,10 @@ public:
   ///Return the number of symbols over which we are making predictions, plus one
   /// (to leave space for an initial 0).
   int GetSize() const {
-    return m_pAlphabet->GetNumberTextSymbols()+1;
+    return m_iNumSyms+1;
   }
 
-  const CAlphInfo *m_pAlphabet;
+  const int m_iNumSyms;
 
 };
 
diff --git a/Src/DasherCore/LanguageModelling/Makefile.am b/Src/DasherCore/LanguageModelling/Makefile.am
index 8df6997..4ac9b80 100644
--- a/Src/DasherCore/LanguageModelling/Makefile.am
+++ b/Src/DasherCore/LanguageModelling/Makefile.am
@@ -7,7 +7,6 @@ libdasherlm_a_SOURCES = \
 		DictLanguageModel.h \
 		HashTable.cpp \
 		HashTable.h \
-		LanguageModel.cpp \
 		LanguageModel.h \
 		MixtureLanguageModel.h \
 		PPMLanguageModel.cpp \
diff --git a/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h b/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h
index b4b18cc..671992c 100644
--- a/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h
@@ -23,19 +23,19 @@ namespace Dasher {
 
   /// \ingroup LM
   /// \{
-  class CMixtureLanguageModel:public CLanguageModel {
+  class CMixtureLanguageModel:public CLanguageModel, public CDasherComponent {
   public:
 
     /////////////////////////////////////////////////////////////////////////////
 
-    CMixtureLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap):CLanguageModel(pEventHandler, pSettingsStore, pAlph) {
+    CMixtureLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap):CLanguageModel(pAlph->GetNumberTextSymbols()), CDasherComponent(pEventHandler, pSettingsStore) {
 
       //      std::cout << m_pAlphabet << std::endl;
 
       NextContext = 0;
 
-      lma = new CPPMLanguageModel(m_pEventHandler, m_pSettingsStore, m_pAlphabet);
-      lmb = new CDictLanguageModel(m_pEventHandler, m_pSettingsStore, m_pAlphabet, pAlphMap);
+      lma = new CPPMLanguageModel(m_pEventHandler, m_pSettingsStore, m_iNumSyms);
+      lmb = new CDictLanguageModel(m_pEventHandler, m_pSettingsStore, pAlph, pAlphMap);
 
     };
 
diff --git a/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp b/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp
index a01c29b..1d6d7f6 100644
--- a/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp
@@ -30,8 +30,8 @@ static char THIS_FILE[] = __FILE__;
 
 /////////////////////////////////////////////////////////////////////
 
-CAbstractPPM::CAbstractPPM(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CAlphInfo *pAlph, CPPMnode *pRoot, int iMaxOrder)
-: CLanguageModel(pEventHandler, pSettingsStore, pAlph), m_pRoot(pRoot), m_iMaxOrder(iMaxOrder), bUpdateExclusion( GetLongParameter(LP_LM_UPDATE_EXCLUSION)!=0 ), m_ContextAlloc(1024) {
+CAbstractPPM::CAbstractPPM(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, int iNumSyms, CPPMnode *pRoot, int iMaxOrder)
+: CLanguageModel(iNumSyms), CDasherComponent(pEventHandler, pSettingsStore), m_pRoot(pRoot), m_iMaxOrder(iMaxOrder), bUpdateExclusion( GetLongParameter(LP_LM_UPDATE_EXCLUSION)!=0 ), m_ContextAlloc(1024) {
   m_pRootContext = m_ContextAlloc.Alloc();
   m_pRootContext->head = m_pRoot;
   m_pRootContext->order = 0;
@@ -444,8 +444,8 @@ CAbstractPPM::CPPMnode * CAbstractPPM::AddSymbolToNode(CPPMnode *pNode, symbol s
   return pReturn;
 }
 
-CPPMLanguageModel::CPPMLanguageModel(CEventHandler *pEvt, CSettingsStore *sets, const CAlphInfo *pAlph)
-: CAbstractPPM(pEvt, sets, pAlph, new CPPMnode(-1), sets->GetLongParameter(LP_LM_MAX_ORDER)), NodesAllocated(0), m_NodeAlloc(8192) {
+CPPMLanguageModel::CPPMLanguageModel(CEventHandler *pEvt, CSettingsStore *sets, int iNumSyms)
+: CAbstractPPM(pEvt, sets, iNumSyms, new CPPMnode(-1), sets->GetLongParameter(LP_LM_MAX_ORDER)), NodesAllocated(0), m_NodeAlloc(8192) {
 }
 
 CAbstractPPM::CPPMnode *CPPMLanguageModel::makeNode(int sym) {
diff --git a/Src/DasherCore/LanguageModelling/PPMLanguageModel.h b/Src/DasherCore/LanguageModelling/PPMLanguageModel.h
index 37b034a..10d0fcc 100644
--- a/Src/DasherCore/LanguageModelling/PPMLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/PPMLanguageModel.h
@@ -13,11 +13,12 @@
 #include "../../Common/Allocators/PooledAlloc.h"
 
 #include "LanguageModel.h"
-
+#include "../DasherComponent.h"
 #include "stdlib.h"
 #include <vector>
 #include <fstream>
 #include <set>
+#include <map>
 
 namespace Dasher {
 
@@ -33,7 +34,7 @@ namespace Dasher {
   /// Subclasses must implement CLanguageModel::GetProbs and a makeNode() method (perhaps
   /// using a pooled allocator).
   ///
-  class CAbstractPPM :public CLanguageModel, private NoClones {
+  class CAbstractPPM :public CLanguageModel, public CDasherComponent, private NoClones {
   protected:
     class ChildIterator;
     class CPPMnode {
@@ -99,7 +100,7 @@ namespace Dasher {
     ///Makes a new node, of whatever kind (subclass of CPPMnode, perhaps with extra info)
     /// is required by the subclass, for the specified symbol. (Initial count will be 1.)
     virtual CPPMnode *makeNode(int sym)=0;
-    CAbstractPPM(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, CPPMnode *pRoot, int iMaxOrder);
+    CAbstractPPM(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, int iNumSyms, CPPMnode *pRoot, int iMaxOrder);
     
     void dumpSymbol(symbol sym);
     void dumpString(char *str, int pos, int len);
@@ -138,7 +139,7 @@ namespace Dasher {
   /// max order from LP_LM_MAX_ORDER.
   class CPPMLanguageModel : public CAbstractPPM {
   public:
-    CPPMLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSets, const CAlphInfo *pAlph);
+    CPPMLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSets, int iNumSyms);
     virtual void GetProbs(Context context, std::vector < unsigned int >&Probs, int norm, int iUniform) const;
   protected:
     /// Makes a standard CPPMnode, but using a pooled allocator (m_NodeAlloc) - faster!
diff --git a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp
index cf12a66..c401278 100644
--- a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp
@@ -33,10 +33,8 @@ static char THIS_FILE[] = __FILE__;
 
 /////////////////////////////////////////////////////////////////////
 
-CPPMPYLanguageModel::CPPMPYLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CAlphInfo *pAlph, const CAlphInfo *pPyAlphabet)
-  :CAbstractPPM(pEventHandler, pSettingsStore, pAlph, new CPPMPYnode(-1), 2), NodesAllocated(0), m_NodeAlloc(8192), m_pPyAlphabet(pPyAlphabet){
-
-  m_iAlphSize = GetSize();
+CPPMPYLanguageModel::CPPMPYLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, int iNumCHsyms, int iNumPYsyms)
+  :CAbstractPPM(pEventHandler, pSettingsStore, iNumCHsyms, new CPPMPYnode(-1), 2), NodesAllocated(0), m_NodeAlloc(8192), m_iNumPYsyms(iNumPYsyms) {
 }
 
 /////////////////////////////////////////////////////////////////////
@@ -194,7 +192,7 @@ void CPPMPYLanguageModel::GetPartProbs(Context context, std::vector<pair<symbol,
   // In Will's code, it assigned 0 to the first entry, then split evenly among the rest...seems wrong?!
   int i=0;
   for (std::vector<pair<symbol, unsigned int> >::iterator it = vChildren.begin(); it!=vChildren.end(); it++) {
-    DASHER_ASSERT(it->first > -1 && it->first <= m_iAlphSize);
+    DASHER_ASSERT(it->first > 0 && it->first < GetSize()); //i.e., is valid CH symbol
     it->second = iUniformLeft / (vChildren.size() - i);
       //  std::cout<<"iUniformLeft: "<<iUniformLeft<<std::endl;
     iUniformLeft -= it->second;
@@ -299,7 +297,7 @@ void CPPMPYLanguageModel::GetProbs(Context context, std::vector<unsigned int> &p
   */
   //  DASHER_ASSERT(m_setContexts.count(ppmcontext) > 0);
 
-  int iNumSymbols = m_pPyAlphabet->GetNumberTextSymbols()+1;
+  int iNumSymbols = m_iNumPYsyms+1;
   
   probs.resize(iNumSymbols);
 
@@ -403,7 +401,7 @@ void CPPMPYLanguageModel::LearnPYSymbol(Context c, int pysym) {
   if(pysym==0)
     return;
 
-  DASHER_ASSERT(pysym > 0 && pysym <= m_pPyAlphabet->GetNumberTextSymbols());
+  DASHER_ASSERT(pysym > 0 && pysym <= m_iNumPYsyms);
   CPPMPYLanguageModel::CPPMContext & context = *(CPPMContext *) (c);
  
   //  std::cout<<"py learn context : "<<context.head->symbol<<std::endl;
diff --git a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h
index 707ef23..5252811 100644
--- a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h
@@ -40,11 +40,11 @@ namespace Dasher {
   class CPPMPYLanguageModel : public CAbstractPPM {
   public:
     ///Construct a new PPMPYLanguageModel. 
-    /// \param pAlph alphabet containing the actual symbols we want to write (i.e. Chinese); this
-    /// is the only alphabet passed to the CAbstractPPM superclass.
-    /// \param pPyAlph alphabet of pinyin phonemes; we will predict probabilities for these
+    /// \param iNumCHSyms number of symbols in the alphabet in which we actually want to write (i.e. Chinese),
+    /// i.e. from which contexts are formed; this is passed to the CAbstractPPM superclass.
+    /// \param iNumPYSyms number of pinyin phonemes, i.e. which we generate probabilities for in GetProbs
     /// based (only) on the preceding _Chinese_ symbols.
-    CPPMPYLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, const CAlphInfo *pPyAlph);
+    CPPMPYLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, int iNumCHsyms, int iNumPYsyms);
 
     ///Learns a pinyin symbol in the specified context, but does not move the context on.
     void LearnPYSymbol(Context context, int Symbol);
@@ -52,7 +52,7 @@ namespace Dasher {
     ///Predicts probabilities for the next Pinyin symbol (blending as per PPM,
     /// but using the pychild map rather than child CPPMPYnodes).
     /// \param Probs vector to fill with predictions for pinyin symbols: will be filled
-    ///  with m_pPyAlphabet->GetNumberTextSymbols() numbers plus an initial 0. 
+    ///  with m_iNumPYsyms numbers plus an initial 0. 
     virtual void GetProbs(Context context, std::vector < unsigned int >&Probs, int norm, int iUniform) const;
     
     ///Predicts probabilities for the next Chinese symbol, filtered to only include symbols within a specified set.
@@ -80,8 +80,7 @@ namespace Dasher {
     int NodesAllocated;
     mutable CSimplePooledAlloc < CPPMPYnode > m_NodeAlloc;
 
-    const CAlphInfo *m_pPyAlphabet;
-    int m_iAlphSize;
+    const int m_iNumPYsyms;
   };
 
   /// @}  
diff --git a/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp b/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
index bb3eb9c..fedf503 100644
--- a/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
@@ -123,7 +123,7 @@ CWordLanguageModel::CWordnode * CWordLanguageModel::AddSymbolToNode(CWordnode *p
 
 CWordLanguageModel::CWordLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, 
 				       const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap)
-  :CLanguageModel(pEventHandler, pSettingsStore, pAlph), m_pAlphMap(pAlphMap), NodesAllocated(0), 
+  :CLanguageModel(pAlph->GetNumberTextSymbols()), CDasherComponent(pEventHandler, pSettingsStore), m_pAlphMap(pAlphMap), m_iSpaceSymbol(pAlph->GetSpaceSymbol()), NodesAllocated(0), 
    max_order(2), m_NodeAlloc(8192), m_ContextAlloc(1024) {
   
   // Construct a root node for the trie
@@ -134,7 +134,7 @@ CWordLanguageModel::CWordLanguageModel(Dasher::CEventHandler *pEventHandler, CSe
 
   // Create a spelling model
 
-  pSpellingModel = new CPPMLanguageModel(m_pEventHandler, m_pSettingsStore, m_pAlphabet);
+  pSpellingModel = new CPPMLanguageModel(m_pEventHandler, m_pSettingsStore, m_iNumSyms);
 
   // Construct a root context
   
@@ -626,7 +626,7 @@ void CWordLanguageModel::AddSymbol(CWordLanguageModel::CWordContext &context, sy
   // Collapse the context (with learning) if we've just entered a space
   // FIXME - we need to generalise this for more languages.
 
-  if(sym == m_pAlphabet->GetSpaceSymbol()) {
+  if(sym == m_iSpaceSymbol) {
     CollapseContext(context, bLearn);
     context.m_dSpellingFactor = 1.0;
   }
diff --git a/Src/DasherCore/LanguageModelling/WordLanguageModel.h b/Src/DasherCore/LanguageModelling/WordLanguageModel.h
index 09746ad..9a954a4 100644
--- a/Src/DasherCore/LanguageModelling/WordLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/WordLanguageModel.h
@@ -13,9 +13,10 @@
 
 #include "../../Common/NoClones.h"
 #include "../../Common/Allocators/PooledAlloc.h"
-#include "LanguageModel.h"
 #include "PPMLanguageModel.h"
-#include "../DasherTypes.h"
+#include "../DasherComponent.h"
+#include "../Alphabet/AlphInfo.h"
+#include "../Alphabet/AlphabetMap.h"
 
 #include <vector>
 #include <map>
@@ -33,7 +34,7 @@ namespace Dasher {
   ///
   /// Language model using words
   ///
-  class CWordLanguageModel:public CLanguageModel {
+  class CWordLanguageModel:public CLanguageModel, public CDasherComponent {
   public:
     CWordLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap);
       virtual ~ CWordLanguageModel();
@@ -114,6 +115,7 @@ namespace Dasher {
     int lookup_word_const(const std::string & w) const;
 
     const CAlphabetMap *m_pAlphMap;
+    const int m_iSpaceSymbol;
     
     CWordContext *m_rootcontext;
     CWordnode *m_pRoot;
diff --git a/Src/DasherCore/MandarinAlphMgr.cpp b/Src/DasherCore/MandarinAlphMgr.cpp
index f556ad4..26a3473 100644
--- a/Src/DasherCore/MandarinAlphMgr.cpp
+++ b/Src/DasherCore/MandarinAlphMgr.cpp
@@ -108,7 +108,7 @@ CMandarinAlphMgr::~CMandarinAlphMgr() {
 void CMandarinAlphMgr::CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSettingsStore) {
   //std::cout<<"CHALphabet size "<< pCHAlphabet->GetNumberTextSymbols(); [7603]
   std::cout<<"Setting PPMPY model"<<std::endl;
-  m_pLanguageModel = new CPPMPYLanguageModel(pEventHandler, pSettingsStore, m_pCHAlphabet, m_pAlphabet);
+  m_pLanguageModel = new CPPMPYLanguageModel(pEventHandler, pSettingsStore, m_pCHAlphabet->GetNumberTextSymbols(), m_pAlphabet->GetNumberTextSymbols());
   //our superclass destructor will call ReleaseContext on the iLearnContext when we are destroyed,
   // so we need to put _something_ in there (even tho we don't use it atm!)...
   m_iLearnContext = m_pLanguageModel->CreateEmptyContext();
diff --git a/Src/MacOSX/Dasher.xcodeproj/project.pbxproj b/Src/MacOSX/Dasher.xcodeproj/project.pbxproj
index 7ac5b5b..90f762b 100755
--- a/Src/MacOSX/Dasher.xcodeproj/project.pbxproj
+++ b/Src/MacOSX/Dasher.xcodeproj/project.pbxproj
@@ -89,7 +89,6 @@
 		1948BEF90C226CFD001DFA32 /* DictLanguageModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 1948BE540C226CFD001DFA32 /* DictLanguageModel.h */; };
 		1948BEFA0C226CFD001DFA32 /* HashTable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1948BE550C226CFD001DFA32 /* HashTable.cpp */; };
 		1948BEFB0C226CFD001DFA32 /* HashTable.h in Headers */ = {isa = PBXBuildFile; fileRef = 1948BE560C226CFD001DFA32 /* HashTable.h */; };
-		1948BF030C226CFD001DFA32 /* LanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1948BE5E0C226CFD001DFA32 /* LanguageModel.cpp */; };
 		1948BF040C226CFD001DFA32 /* LanguageModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 1948BE5F0C226CFD001DFA32 /* LanguageModel.h */; };
 		1948BF060C226CFD001DFA32 /* MixtureLanguageModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 1948BE610C226CFD001DFA32 /* MixtureLanguageModel.h */; };
 		1948BF070C226CFD001DFA32 /* PPMLanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1948BE620C226CFD001DFA32 /* PPMLanguageModel.cpp */; };
@@ -494,7 +493,6 @@
 		1948BE5B0C226CFD001DFA32 /* KanjiConversionCanna.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = KanjiConversionCanna.h; sourceTree = "<group>"; };
 		1948BE5C0C226CFD001DFA32 /* KanjiConversionIME.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = KanjiConversionIME.cpp; sourceTree = "<group>"; };
 		1948BE5D0C226CFD001DFA32 /* KanjiConversionIME.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = KanjiConversionIME.h; sourceTree = "<group>"; };
-		1948BE5E0C226CFD001DFA32 /* LanguageModel.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = LanguageModel.cpp; sourceTree = "<group>"; };
 		1948BE5F0C226CFD001DFA32 /* LanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = LanguageModel.h; sourceTree = "<group>"; };
 		1948BE610C226CFD001DFA32 /* MixtureLanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = MixtureLanguageModel.h; sourceTree = "<group>"; };
 		1948BE620C226CFD001DFA32 /* PPMLanguageModel.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = PPMLanguageModel.cpp; sourceTree = "<group>"; };
@@ -1077,7 +1075,6 @@
 				1948BE5B0C226CFD001DFA32 /* KanjiConversionCanna.h */,
 				1948BE5C0C226CFD001DFA32 /* KanjiConversionIME.cpp */,
 				1948BE5D0C226CFD001DFA32 /* KanjiConversionIME.h */,
-				1948BE5E0C226CFD001DFA32 /* LanguageModel.cpp */,
 				1948BE5F0C226CFD001DFA32 /* LanguageModel.h */,
 				1948BE610C226CFD001DFA32 /* MixtureLanguageModel.h */,
 				1948BE620C226CFD001DFA32 /* PPMLanguageModel.cpp */,
@@ -1780,7 +1777,6 @@
 				1948BEF60C226CFD001DFA32 /* CTWLanguageModel.cpp in Sources */,
 				1948BEF80C226CFD001DFA32 /* DictLanguageModel.cpp in Sources */,
 				1948BEFA0C226CFD001DFA32 /* HashTable.cpp in Sources */,
-				1948BF030C226CFD001DFA32 /* LanguageModel.cpp in Sources */,
 				1948BF070C226CFD001DFA32 /* PPMLanguageModel.cpp in Sources */,
 				1948BF0A0C226CFD001DFA32 /* WordLanguageModel.cpp in Sources */,
 				1948BF0D0C226CFD001DFA32 /* MemoryLeak.cpp in Sources */,
diff --git a/Src/iPhone/Dasher.xcodeproj/project.pbxproj b/Src/iPhone/Dasher.xcodeproj/project.pbxproj
index aee49eb..5e22a5c 100755
--- a/Src/iPhone/Dasher.xcodeproj/project.pbxproj
+++ b/Src/iPhone/Dasher.xcodeproj/project.pbxproj
@@ -195,7 +195,6 @@
 		3344FE440F71717C00506EAA /* CTWLanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDC70F71717C00506EAA /* CTWLanguageModel.cpp */; };
 		3344FE450F71717C00506EAA /* DictLanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDC90F71717C00506EAA /* DictLanguageModel.cpp */; };
 		3344FE460F71717C00506EAA /* HashTable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDCB0F71717C00506EAA /* HashTable.cpp */; };
-		3344FE4A0F71717C00506EAA /* LanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDD40F71717C00506EAA /* LanguageModel.cpp */; };
 		3344FE4C0F71717C00506EAA /* PPMLanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDD80F71717C00506EAA /* PPMLanguageModel.cpp */; };
 		3344FE4D0F71717C00506EAA /* WordLanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDDB0F71717C00506EAA /* WordLanguageModel.cpp */; };
 		3344FE4F0F71717C00506EAA /* MemoryLeak.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDDE0F71717C00506EAA /* MemoryLeak.cpp */; };
@@ -606,7 +605,6 @@
 		3344FDCA0F71717C00506EAA /* DictLanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DictLanguageModel.h; sourceTree = "<group>"; };
 		3344FDCB0F71717C00506EAA /* HashTable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HashTable.cpp; sourceTree = "<group>"; };
 		3344FDCC0F71717C00506EAA /* HashTable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HashTable.h; sourceTree = "<group>"; };
-		3344FDD40F71717C00506EAA /* LanguageModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LanguageModel.cpp; sourceTree = "<group>"; };
 		3344FDD50F71717C00506EAA /* LanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LanguageModel.h; sourceTree = "<group>"; };
 		3344FDD70F71717C00506EAA /* MixtureLanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MixtureLanguageModel.h; sourceTree = "<group>"; };
 		3344FDD80F71717C00506EAA /* PPMLanguageModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PPMLanguageModel.cpp; sourceTree = "<group>"; };
@@ -1325,7 +1323,6 @@
 				3344FDCA0F71717C00506EAA /* DictLanguageModel.h */,
 				3344FDCB0F71717C00506EAA /* HashTable.cpp */,
 				3344FDCC0F71717C00506EAA /* HashTable.h */,
-				3344FDD40F71717C00506EAA /* LanguageModel.cpp */,
 				3344FDD50F71717C00506EAA /* LanguageModel.h */,
 				3344FDD70F71717C00506EAA /* MixtureLanguageModel.h */,
 				3344FDD80F71717C00506EAA /* PPMLanguageModel.cpp */,
@@ -1590,7 +1587,6 @@
 				3344FE440F71717C00506EAA /* CTWLanguageModel.cpp in Sources */,
 				3344FE450F71717C00506EAA /* DictLanguageModel.cpp in Sources */,
 				3344FE460F71717C00506EAA /* HashTable.cpp in Sources */,
-				3344FE4A0F71717C00506EAA /* LanguageModel.cpp in Sources */,
 				3344FE4C0F71717C00506EAA /* PPMLanguageModel.cpp in Sources */,
 				3344FE4D0F71717C00506EAA /* WordLanguageModel.cpp in Sources */,
 				3344FE4F0F71717C00506EAA /* MemoryLeak.cpp in Sources */,



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]