[dasher] Tidy/refactor AlphabetManager + LM + Trainer creation, removing old chinese



commit 13ec716d51eb5bdb2156ecd8cfe98b1f1328d3f6
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date:   Wed Feb 9 20:06:40 2011 +0000

    Tidy/refactor AlphabetManager + LM + Trainer creation, removing old chinese
    
    AlphabetManager has virtual CreateLanguageModel() and GetTrainer() w/defaults,
    overridden by MandarinAlphMgr to create PPMPY model & MandarinTrainer.
    
    m_iconversionID 2 -> use MandarinAlphMgr, regardless of alphabet name.

 Src/DasherCore/AlphabetManager.cpp     |   37 +++++++++++--
 Src/DasherCore/AlphabetManager.h       |   15 +++++-
 Src/DasherCore/ConvertingAlphMgr.cpp   |    4 +-
 Src/DasherCore/ConvertingAlphMgr.h     |    2 +-
 Src/DasherCore/MandarinAlphMgr.cpp     |   17 +++++-
 Src/DasherCore/MandarinAlphMgr.h       |    8 +++-
 Src/DasherCore/NodeCreationManager.cpp |   93 +++++++++----------------------
 7 files changed, 98 insertions(+), 78 deletions(-)
---
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index ca47332..21372f5 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -27,7 +27,12 @@
 #include "Event.h"
 #include "EventHandler.h"
 #include "NodeCreationManager.h"
-
+#include "LanguageModelling/PPMLanguageModel.h"
+#include "LanguageModelling/WordLanguageModel.h"
+#include "LanguageModelling/DictLanguageModel.h"
+#include "LanguageModelling/MixtureLanguageModel.h"
+#include "LanguageModelling/PPMPYLanguageModel.h"
+#include "LanguageModelling/CTWLanguageModel.h"
 
 #include <vector>
 #include <sstream>
@@ -45,13 +50,37 @@ static char THIS_FILE[] = __FILE__;
 #endif
 #endif
 
-CAlphabetManager::CAlphabetManager(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap, CLanguageModel *pLanguageModel)
-  : m_pLanguageModel(pLanguageModel), m_pNCManager(pNCManager), m_pAlphabet(pAlphabet), m_pAlphabetMap(pAlphabetMap) {
-  m_pInterface = pInterface;
+CAlphabetManager::CAlphabetManager(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap)
+  : m_pNCManager(pNCManager), m_pAlphabet(pAlphabet), m_pAlphabetMap(pAlphabetMap), m_pInterface(pInterface) {
+}
 
+void CAlphabetManager::CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSettingsStore) {
+  // FIXME - return to using enum here
+  switch (m_pInterface->GetLongParameter(LP_LANGUAGE_MODEL_ID)) {
+    default:
+      // If there is a bogus value for the language model ID, we'll default
+      // to our trusty old PPM language model.      
+    case 0:
+      m_pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet);
+      break;
+    case 2:
+      m_pLanguageModel = new CWordLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet, m_pAlphabetMap);
+      break;
+    case 3:
+      m_pLanguageModel = new CMixtureLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet, m_pAlphabetMap);
+      break;  
+    case 4:
+      m_pLanguageModel = new CCTWLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet);
+      break;
+  }
+  
   m_iLearnContext = m_pLanguageModel->CreateEmptyContext();
 }
 
+CTrainer *CAlphabetManager::GetTrainer() {
+  return new CTrainer(m_pLanguageModel, m_pAlphabetMap);
+}
+
 const CAlphInfo *CAlphabetManager::GetAlphabet() const {
   return m_pAlphabet;
 }
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index 7777bbd..850f465 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -25,6 +25,7 @@
 #include "DasherNode.h"
 #include "Parameters.h"
 #include "NodeManager.h"
+#include "Trainer.h"
 
 class CNodeCreationManager;
 struct SGroupInfo;
@@ -43,8 +44,16 @@ namespace Dasher {
   ///
   class CAlphabetManager : public CNodeManager {
   public:
+    ///Create a new AlphabetManager. Note, not usable until CreateLanguageModel() called.
+    CAlphabetManager(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap);
+    ///Creates the LM, and stores in m_pLanguageModel. Must be called after construction,
+    /// before the AlphMgr is used. Default implementation switches on LP_LANGUAGE_MODEL_ID.
+    virtual void CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSets);
+
+    ///Gets a new trainer to train this LM. Caller is responsible for deallocating the
+    /// trainer later.
+    virtual CTrainer *GetTrainer();
 
-    CAlphabetManager(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap, CLanguageModel *pLanguageModel);
     virtual ~CAlphabetManager();
 
   protected:
@@ -131,6 +140,7 @@ namespace Dasher {
     virtual CAlphNode *GetRoot(CDasherNode *pParent, unsigned int iLower, unsigned int iUpper, bool bEnteredLast, int iOffset);
 
     const CAlphInfo *GetAlphabet() const;
+    
   protected:
     ///
     /// Factory method for CAlphNode construction, so subclasses can override.
@@ -147,6 +157,8 @@ namespace Dasher {
     virtual void AddExtras(CAlphNode *pParent, std::vector<unsigned int> *pCProb);
 
     CLanguageModel *m_pLanguageModel;
+    CLanguageModel::Context m_iLearnContext;
+
     CNodeCreationManager *m_pNCManager;
     const CAlphInfo *m_pAlphabet;
     const CAlphabetMap *m_pAlphabetMap;
@@ -157,7 +169,6 @@ namespace Dasher {
     void GetProbs(std::vector<unsigned int> *pProbs, CLanguageModel::Context iContext);
     void IterateChildGroups(CAlphNode *pParent, const SGroupInfo *pParentGroup, CAlphNode *buildAround);
 
-    CLanguageModel::Context m_iLearnContext;
     CDasherInterfaceBase *m_pInterface;
     
   };
diff --git a/Src/DasherCore/ConvertingAlphMgr.cpp b/Src/DasherCore/ConvertingAlphMgr.cpp
index fb78f06..033be71 100644
--- a/Src/DasherCore/ConvertingAlphMgr.cpp
+++ b/Src/DasherCore/ConvertingAlphMgr.cpp
@@ -12,8 +12,8 @@
 
 using namespace Dasher;
 
-CConvertingAlphMgr::CConvertingAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CConversionManager *pConvMgr, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap, CLanguageModel *pLanguageModel)
- : CAlphabetManager(pInterface, pNCManager, pAlphabet, pAlphabetMap, pLanguageModel), m_pConvMgr(pConvMgr) {
+CConvertingAlphMgr::CConvertingAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CConversionManager *pConvMgr, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap)
+ : CAlphabetManager(pInterface, pNCManager, pAlphabet, pAlphabetMap), m_pConvMgr(pConvMgr) {
  }
 
 CConvertingAlphMgr::~CConvertingAlphMgr() {
diff --git a/Src/DasherCore/ConvertingAlphMgr.h b/Src/DasherCore/ConvertingAlphMgr.h
index 86495b5..2146906 100644
--- a/Src/DasherCore/ConvertingAlphMgr.h
+++ b/Src/DasherCore/ConvertingAlphMgr.h
@@ -16,7 +16,7 @@
 namespace Dasher {
   class CConvertingAlphMgr : public CAlphabetManager {
   public:
-    CConvertingAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CConversionManager *pConvMgr, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap, CLanguageModel *pLanguageModel);
+    CConvertingAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CConversionManager *pConvMgr, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap);
     virtual ~CConvertingAlphMgr();
   protected:
     void AddExtras(CAlphNode *pParent, std::vector<unsigned int> *pCProb);
diff --git a/Src/DasherCore/MandarinAlphMgr.cpp b/Src/DasherCore/MandarinAlphMgr.cpp
index 60f0550..a242058 100644
--- a/Src/DasherCore/MandarinAlphMgr.cpp
+++ b/Src/DasherCore/MandarinAlphMgr.cpp
@@ -50,8 +50,8 @@ static char THIS_FILE[] = __FILE__;
 // and do not correspond to groups in the chinese alphabet.
 #define LAST_PY 1288
 
-CMandarinAlphMgr::CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphMap, CLanguageModel *pLanguageModel)
-  : CAlphabetManager(pInterface, pNCManager, pAlphabet, pAlphMap, pLanguageModel),
+CMandarinAlphMgr::CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphMap)
+  : CAlphabetManager(pInterface, pNCManager, pAlphabet, pAlphMap),
     m_pCHAlphabet(pInterface->GetInfo("Chinese ç®?ä½?中æ?? (simplified chinese, in pin yin groups, and pinyin)")),
     m_pCHAlphabetMap(m_pCHAlphabet->MakeMap()),
     m_pConversionsBySymbol(new set<symbol>[LAST_PY+1]) {
@@ -99,6 +99,19 @@ CMandarinAlphMgr::~CMandarinAlphMgr() {
   delete[] m_pConversionsBySymbol;
 }
 
+void CMandarinAlphMgr::CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSettingsStore) {
+  //std::cout<<"CHALphabet size "<< pCHAlphabet->GetNumberTextSymbols(); [7603]
+  std::cout<<"Setting PPMPY model"<<std::endl;
+  m_pLanguageModel = new CPPMPYLanguageModel(pEventHandler, pSettingsStore, m_pCHAlphabet, m_pAlphabet);
+  //our superclass destructor will call ReleaseContext on the iLearnContext when we are destroyed,
+  // so we need to put _something_ in there (even tho we don't use it atm!)...
+  m_iLearnContext = m_pLanguageModel->CreateEmptyContext();
+}
+
+CTrainer *CMandarinAlphMgr::GetTrainer() {
+  return new CMandarinTrainer(m_pLanguageModel, m_pAlphabetMap, m_pCHAlphabetMap);
+}
+
 CDasherNode *CMandarinAlphMgr::CreateSymbolNode(CAlphNode *pParent, symbol iSymbol, unsigned int iLbnd, unsigned int iHbnd) {
 
   if (iSymbol <= LAST_PY) {
diff --git a/Src/DasherCore/MandarinAlphMgr.h b/Src/DasherCore/MandarinAlphMgr.h
index 56968ec..1db6a6f 100644
--- a/Src/DasherCore/MandarinAlphMgr.h
+++ b/Src/DasherCore/MandarinAlphMgr.h
@@ -35,8 +35,14 @@ namespace Dasher {
   class CMandarinAlphMgr : public CAlphabetManager {
   public:
 
-    CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphMap, CLanguageModel *pLanguageModel);
+    CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphMap);
     ~CMandarinAlphMgr();
+    
+    ///WZ: Mandarin Dasher Change. Sets language model to PPMPY.
+    void CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSets);
+    ///ACL: returns a MandarinTrainer too.
+    CTrainer *GetTrainer();
+    
     /*ACL note: used to override GetRoot,
      to attempt to clone the context of the previous node
      in the case that the previous node was a PinyinConversionHelper node
diff --git a/Src/DasherCore/NodeCreationManager.cpp b/Src/DasherCore/NodeCreationManager.cpp
index 0d6c684..6b9f20d 100644
--- a/Src/DasherCore/NodeCreationManager.cpp
+++ b/Src/DasherCore/NodeCreationManager.cpp
@@ -1,11 +1,5 @@
 #include "DasherNode.h"
 #include "DasherInterfaceBase.h"
-#include "LanguageModelling/PPMLanguageModel.h"
-#include "LanguageModelling/WordLanguageModel.h"
-#include "LanguageModelling/DictLanguageModel.h"
-#include "LanguageModelling/MixtureLanguageModel.h"
-#include "LanguageModelling/PPMPYLanguageModel.h"
-#include "LanguageModelling/CTWLanguageModel.h"
 #include "NodeCreationManager.h"
 #include "MandarinAlphMgr.h"
 #include "ConvertingAlphMgr.h"
@@ -30,71 +24,38 @@ CNodeCreationManager::CNodeCreationManager(Dasher::CDasherInterfaceBase *pInterf
   
   // --
   
-  // Create an appropriate language model;
-  CLanguageModel *pLanguageModel;
-  //WZ: Mandarin Dasher Change
-  //If statement checks for the specific Super PinYin alphabet, and sets language model to PPMPY
-  if((pAlphInfo->m_iConversionID==2)&&(pSettingsStore->GetStringParameter(SP_ALPHABET_ID)=="Chinese Super Pin Yin, grouped by Dictionary")){
-    
-    std::string CHAlphabet = "Chinese ç®?ä½?中æ?? (simplified chinese, in pin yin groups, and pinyin)";
-    const Dasher::CAlphInfo *pCHAlphInfo(pAlphIO->GetInfo(CHAlphabet));
-    const CAlphabetMap *pCHAlphMap = pCHAlphInfo->MakeMap();
-    
-    //std::cout<<"CHALphabet size "<< pCHAlphabet->GetNumberTextSymbols(); [7603]
-    pLanguageModel = new CPPMPYLanguageModel(pEventHandler, pSettingsStore, pCHAlphInfo, pAlphInfo);
-    m_pTrainer = new CMandarinTrainer(pLanguageModel, pAlphMap, pCHAlphMap);
-    std::cout<<"Setting PPMPY model"<<std::endl;
-  }
-  else{
-    //End Mandarin Dasher Change
-    
-    // FIXME - return to using enum here
-    switch (pSettingsStore->GetLongParameter(LP_LANGUAGE_MODEL_ID)) {
-      case 0:
-        pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, pAlphInfo);
-        break;
-      case 2:
-        pLanguageModel = new CWordLanguageModel(pEventHandler, pSettingsStore, pAlphInfo, pAlphMap);
-        break;
-      case 3:
-        pLanguageModel = new CMixtureLanguageModel(pEventHandler, pSettingsStore, pAlphInfo, pAlphMap);
-        break;  
-      case 4:
-        pLanguageModel = new CCTWLanguageModel(pEventHandler, pSettingsStore, pAlphInfo);
-        break;
-        
-      default:
-        // If there is a bogus value for the language model ID, we'll default
-        // to our trusty old PPM language model.
-        pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, pAlphInfo);    
-        break;
-    }
-    m_pTrainer = new CTrainer(pLanguageModel, pAlphMap);
-  }
-    
-    switch(pAlphInfo->m_iConversionID) {
-      default:
-        //TODO: Error reporting here
-        //fall through to
-      case 0: // No conversion required
-        m_pAlphabetManager = new CAlphabetManager(pInterface, this, pAlphInfo, pAlphMap, pLanguageModel);
-        break;
+  switch (pAlphInfo->m_iConversionID) {
+    default:
+      //TODO: Error reporting here
+      //fall through to
+    case 0: // No conversion required
+      m_pAlphabetManager = new CAlphabetManager(pInterface, this, pAlphInfo, pAlphMap);
+      break;      
 #ifdef JAPANESE
-      case 1: // Japanese
-        CConversionManager *pConversionManager =
+    case 1: {
+      // Japanese
+      CConversionManager *pConversionManager =
 #ifdef WIN32
-        new CIMEConversionHelper;
+      new CIMEConversionHelper;
 #else
-        new CCannaConversionHelper(this, pAlphInfo, GetLongParameter(LP_CONVERSION_TYPE), GetLongParameter(LP_CONVERSION_ORDER));
-#endif
-        //TODO ownership/deletion
-        m_pAlphabetManager = new CConvertingAlphMgr(pInterface, this, pConversionManager, pAlphInfo, pLanguageModel);
-        break;
+      new CCannaConversionHelper(this, pAlphInfo, GetLongParameter(LP_CONVERSION_TYPE), GetLongParameter(LP_CONVERSION_ORDER));
 #endif
-      case 2:   //(ACL) Modify AlphabetManager for Mandarin Dasher
-        m_pAlphabetManager = new CMandarinAlphMgr(pInterface, this, pAlphInfo, pAlphMap, pLanguageModel);
+      //TODO ownership/deletion
+      m_pAlphabetManager = new CConvertingAlphMgr(pInterface, this, pConversionManager, pAlphInfo, pLanguageModel);
+      break;
     }
-
+#endif
+    case 2:
+      //Mandarin Dasher!
+      //(ACL) Modify AlphabetManager for Mandarin Dasher
+      m_pAlphabetManager = new CMandarinAlphMgr(pInterface, this, pAlphInfo, pAlphMap);
+      break;
+  }
+  //all other configuration changes, etc., that might be necessary for a particular conversion mode,
+  // are implemented by AlphabetManager subclasses overriding the following two methods:
+  m_pAlphabetManager->CreateLanguageModel(pEventHandler, pSettingsStore);
+  m_pTrainer = m_pAlphabetManager->GetTrainer();
+    
   if (!pAlphInfo->GetTrainingFile().empty()) {
     //1. Look for system training text...
     CLockEvent oEvent("Training on System Text", true, 0);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]