[dasher] Tidy/refactor AlphabetManager + LM + Trainer creation, removing old chinese
- From: Patrick Welche <pwelche src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dasher] Tidy/refactor AlphabetManager + LM + Trainer creation, removing old chinese
- Date: Tue, 15 Mar 2011 17:11:32 +0000 (UTC)
commit 13ec716d51eb5bdb2156ecd8cfe98b1f1328d3f6
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date: Wed Feb 9 20:06:40 2011 +0000
Tidy/refactor AlphabetManager + LM + Trainer creation, removing old chinese
AlphabetManager has virtual CreateLanguageModel() and GetTrainer() w/defaults,
overridden by MandarinAlphMgr to create PPMPY model & MandarinTrainer.
m_iconversionID 2 -> use MandarinAlphMgr, regardless of alphabet name.
Src/DasherCore/AlphabetManager.cpp | 37 +++++++++++--
Src/DasherCore/AlphabetManager.h | 15 +++++-
Src/DasherCore/ConvertingAlphMgr.cpp | 4 +-
Src/DasherCore/ConvertingAlphMgr.h | 2 +-
Src/DasherCore/MandarinAlphMgr.cpp | 17 +++++-
Src/DasherCore/MandarinAlphMgr.h | 8 +++-
Src/DasherCore/NodeCreationManager.cpp | 93 +++++++++----------------------
7 files changed, 98 insertions(+), 78 deletions(-)
---
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index ca47332..21372f5 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -27,7 +27,12 @@
#include "Event.h"
#include "EventHandler.h"
#include "NodeCreationManager.h"
-
+#include "LanguageModelling/PPMLanguageModel.h"
+#include "LanguageModelling/WordLanguageModel.h"
+#include "LanguageModelling/DictLanguageModel.h"
+#include "LanguageModelling/MixtureLanguageModel.h"
+#include "LanguageModelling/PPMPYLanguageModel.h"
+#include "LanguageModelling/CTWLanguageModel.h"
#include <vector>
#include <sstream>
@@ -45,13 +50,37 @@ static char THIS_FILE[] = __FILE__;
#endif
#endif
-CAlphabetManager::CAlphabetManager(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap, CLanguageModel *pLanguageModel)
- : m_pLanguageModel(pLanguageModel), m_pNCManager(pNCManager), m_pAlphabet(pAlphabet), m_pAlphabetMap(pAlphabetMap) {
- m_pInterface = pInterface;
+CAlphabetManager::CAlphabetManager(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap)
+ : m_pNCManager(pNCManager), m_pAlphabet(pAlphabet), m_pAlphabetMap(pAlphabetMap), m_pInterface(pInterface) {
+}
+void CAlphabetManager::CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSettingsStore) {
+ // FIXME - return to using enum here
+ switch (m_pInterface->GetLongParameter(LP_LANGUAGE_MODEL_ID)) {
+ default:
+ // If there is a bogus value for the language model ID, we'll default
+ // to our trusty old PPM language model.
+ case 0:
+ m_pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet);
+ break;
+ case 2:
+ m_pLanguageModel = new CWordLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet, m_pAlphabetMap);
+ break;
+ case 3:
+ m_pLanguageModel = new CMixtureLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet, m_pAlphabetMap);
+ break;
+ case 4:
+ m_pLanguageModel = new CCTWLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet);
+ break;
+ }
+
m_iLearnContext = m_pLanguageModel->CreateEmptyContext();
}
+CTrainer *CAlphabetManager::GetTrainer() {
+ return new CTrainer(m_pLanguageModel, m_pAlphabetMap);
+}
+
const CAlphInfo *CAlphabetManager::GetAlphabet() const {
return m_pAlphabet;
}
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index 7777bbd..850f465 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -25,6 +25,7 @@
#include "DasherNode.h"
#include "Parameters.h"
#include "NodeManager.h"
+#include "Trainer.h"
class CNodeCreationManager;
struct SGroupInfo;
@@ -43,8 +44,16 @@ namespace Dasher {
///
class CAlphabetManager : public CNodeManager {
public:
+ ///Create a new AlphabetManager. Note, not usable until CreateLanguageModel() called.
+ CAlphabetManager(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap);
+ ///Creates the LM, and stores in m_pLanguageModel. Must be called after construction,
+ /// before the AlphMgr is used. Default implementation switches on LP_LANGUAGE_MODEL_ID.
+ virtual void CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSets);
+
+ ///Gets a new trainer to train this LM. Caller is responsible for deallocating the
+ /// trainer later.
+ virtual CTrainer *GetTrainer();
- CAlphabetManager(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap, CLanguageModel *pLanguageModel);
virtual ~CAlphabetManager();
protected:
@@ -131,6 +140,7 @@ namespace Dasher {
virtual CAlphNode *GetRoot(CDasherNode *pParent, unsigned int iLower, unsigned int iUpper, bool bEnteredLast, int iOffset);
const CAlphInfo *GetAlphabet() const;
+
protected:
///
/// Factory method for CAlphNode construction, so subclasses can override.
@@ -147,6 +157,8 @@ namespace Dasher {
virtual void AddExtras(CAlphNode *pParent, std::vector<unsigned int> *pCProb);
CLanguageModel *m_pLanguageModel;
+ CLanguageModel::Context m_iLearnContext;
+
CNodeCreationManager *m_pNCManager;
const CAlphInfo *m_pAlphabet;
const CAlphabetMap *m_pAlphabetMap;
@@ -157,7 +169,6 @@ namespace Dasher {
void GetProbs(std::vector<unsigned int> *pProbs, CLanguageModel::Context iContext);
void IterateChildGroups(CAlphNode *pParent, const SGroupInfo *pParentGroup, CAlphNode *buildAround);
- CLanguageModel::Context m_iLearnContext;
CDasherInterfaceBase *m_pInterface;
};
diff --git a/Src/DasherCore/ConvertingAlphMgr.cpp b/Src/DasherCore/ConvertingAlphMgr.cpp
index fb78f06..033be71 100644
--- a/Src/DasherCore/ConvertingAlphMgr.cpp
+++ b/Src/DasherCore/ConvertingAlphMgr.cpp
@@ -12,8 +12,8 @@
using namespace Dasher;
-CConvertingAlphMgr::CConvertingAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CConversionManager *pConvMgr, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap, CLanguageModel *pLanguageModel)
- : CAlphabetManager(pInterface, pNCManager, pAlphabet, pAlphabetMap, pLanguageModel), m_pConvMgr(pConvMgr) {
+CConvertingAlphMgr::CConvertingAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CConversionManager *pConvMgr, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap)
+ : CAlphabetManager(pInterface, pNCManager, pAlphabet, pAlphabetMap), m_pConvMgr(pConvMgr) {
}
CConvertingAlphMgr::~CConvertingAlphMgr() {
diff --git a/Src/DasherCore/ConvertingAlphMgr.h b/Src/DasherCore/ConvertingAlphMgr.h
index 86495b5..2146906 100644
--- a/Src/DasherCore/ConvertingAlphMgr.h
+++ b/Src/DasherCore/ConvertingAlphMgr.h
@@ -16,7 +16,7 @@
namespace Dasher {
class CConvertingAlphMgr : public CAlphabetManager {
public:
- CConvertingAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CConversionManager *pConvMgr, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap, CLanguageModel *pLanguageModel);
+ CConvertingAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CConversionManager *pConvMgr, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphabetMap);
virtual ~CConvertingAlphMgr();
protected:
void AddExtras(CAlphNode *pParent, std::vector<unsigned int> *pCProb);
diff --git a/Src/DasherCore/MandarinAlphMgr.cpp b/Src/DasherCore/MandarinAlphMgr.cpp
index 60f0550..a242058 100644
--- a/Src/DasherCore/MandarinAlphMgr.cpp
+++ b/Src/DasherCore/MandarinAlphMgr.cpp
@@ -50,8 +50,8 @@ static char THIS_FILE[] = __FILE__;
// and do not correspond to groups in the chinese alphabet.
#define LAST_PY 1288
-CMandarinAlphMgr::CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphMap, CLanguageModel *pLanguageModel)
- : CAlphabetManager(pInterface, pNCManager, pAlphabet, pAlphMap, pLanguageModel),
+CMandarinAlphMgr::CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphMap)
+ : CAlphabetManager(pInterface, pNCManager, pAlphabet, pAlphMap),
m_pCHAlphabet(pInterface->GetInfo("Chinese ç®?ä½?ä¸æ?? (simplified chinese, in pin yin groups, and pinyin)")),
m_pCHAlphabetMap(m_pCHAlphabet->MakeMap()),
m_pConversionsBySymbol(new set<symbol>[LAST_PY+1]) {
@@ -99,6 +99,19 @@ CMandarinAlphMgr::~CMandarinAlphMgr() {
delete[] m_pConversionsBySymbol;
}
+void CMandarinAlphMgr::CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSettingsStore) {
+ //std::cout<<"CHALphabet size "<< pCHAlphabet->GetNumberTextSymbols(); [7603]
+ std::cout<<"Setting PPMPY model"<<std::endl;
+ m_pLanguageModel = new CPPMPYLanguageModel(pEventHandler, pSettingsStore, m_pCHAlphabet, m_pAlphabet);
+ //our superclass destructor will call ReleaseContext on the iLearnContext when we are destroyed,
+ // so we need to put _something_ in there (even tho we don't use it atm!)...
+ m_iLearnContext = m_pLanguageModel->CreateEmptyContext();
+}
+
+CTrainer *CMandarinAlphMgr::GetTrainer() {
+ return new CMandarinTrainer(m_pLanguageModel, m_pAlphabetMap, m_pCHAlphabetMap);
+}
+
CDasherNode *CMandarinAlphMgr::CreateSymbolNode(CAlphNode *pParent, symbol iSymbol, unsigned int iLbnd, unsigned int iHbnd) {
if (iSymbol <= LAST_PY) {
diff --git a/Src/DasherCore/MandarinAlphMgr.h b/Src/DasherCore/MandarinAlphMgr.h
index 56968ec..1db6a6f 100644
--- a/Src/DasherCore/MandarinAlphMgr.h
+++ b/Src/DasherCore/MandarinAlphMgr.h
@@ -35,8 +35,14 @@ namespace Dasher {
class CMandarinAlphMgr : public CAlphabetManager {
public:
- CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphMap, CLanguageModel *pLanguageModel);
+ CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, const CAlphInfo *pAlphabet, const CAlphabetMap *pAlphMap);
~CMandarinAlphMgr();
+
+ ///WZ: Mandarin Dasher Change. Sets language model to PPMPY.
+ void CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSets);
+ ///ACL: returns a MandarinTrainer too.
+ CTrainer *GetTrainer();
+
/*ACL note: used to override GetRoot,
to attempt to clone the context of the previous node
in the case that the previous node was a PinyinConversionHelper node
diff --git a/Src/DasherCore/NodeCreationManager.cpp b/Src/DasherCore/NodeCreationManager.cpp
index 0d6c684..6b9f20d 100644
--- a/Src/DasherCore/NodeCreationManager.cpp
+++ b/Src/DasherCore/NodeCreationManager.cpp
@@ -1,11 +1,5 @@
#include "DasherNode.h"
#include "DasherInterfaceBase.h"
-#include "LanguageModelling/PPMLanguageModel.h"
-#include "LanguageModelling/WordLanguageModel.h"
-#include "LanguageModelling/DictLanguageModel.h"
-#include "LanguageModelling/MixtureLanguageModel.h"
-#include "LanguageModelling/PPMPYLanguageModel.h"
-#include "LanguageModelling/CTWLanguageModel.h"
#include "NodeCreationManager.h"
#include "MandarinAlphMgr.h"
#include "ConvertingAlphMgr.h"
@@ -30,71 +24,38 @@ CNodeCreationManager::CNodeCreationManager(Dasher::CDasherInterfaceBase *pInterf
// --
- // Create an appropriate language model;
- CLanguageModel *pLanguageModel;
- //WZ: Mandarin Dasher Change
- //If statement checks for the specific Super PinYin alphabet, and sets language model to PPMPY
- if((pAlphInfo->m_iConversionID==2)&&(pSettingsStore->GetStringParameter(SP_ALPHABET_ID)=="Chinese Super Pin Yin, grouped by Dictionary")){
-
- std::string CHAlphabet = "Chinese ç®?ä½?ä¸æ?? (simplified chinese, in pin yin groups, and pinyin)";
- const Dasher::CAlphInfo *pCHAlphInfo(pAlphIO->GetInfo(CHAlphabet));
- const CAlphabetMap *pCHAlphMap = pCHAlphInfo->MakeMap();
-
- //std::cout<<"CHALphabet size "<< pCHAlphabet->GetNumberTextSymbols(); [7603]
- pLanguageModel = new CPPMPYLanguageModel(pEventHandler, pSettingsStore, pCHAlphInfo, pAlphInfo);
- m_pTrainer = new CMandarinTrainer(pLanguageModel, pAlphMap, pCHAlphMap);
- std::cout<<"Setting PPMPY model"<<std::endl;
- }
- else{
- //End Mandarin Dasher Change
-
- // FIXME - return to using enum here
- switch (pSettingsStore->GetLongParameter(LP_LANGUAGE_MODEL_ID)) {
- case 0:
- pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, pAlphInfo);
- break;
- case 2:
- pLanguageModel = new CWordLanguageModel(pEventHandler, pSettingsStore, pAlphInfo, pAlphMap);
- break;
- case 3:
- pLanguageModel = new CMixtureLanguageModel(pEventHandler, pSettingsStore, pAlphInfo, pAlphMap);
- break;
- case 4:
- pLanguageModel = new CCTWLanguageModel(pEventHandler, pSettingsStore, pAlphInfo);
- break;
-
- default:
- // If there is a bogus value for the language model ID, we'll default
- // to our trusty old PPM language model.
- pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, pAlphInfo);
- break;
- }
- m_pTrainer = new CTrainer(pLanguageModel, pAlphMap);
- }
-
- switch(pAlphInfo->m_iConversionID) {
- default:
- //TODO: Error reporting here
- //fall through to
- case 0: // No conversion required
- m_pAlphabetManager = new CAlphabetManager(pInterface, this, pAlphInfo, pAlphMap, pLanguageModel);
- break;
+ switch (pAlphInfo->m_iConversionID) {
+ default:
+ //TODO: Error reporting here
+ //fall through to
+ case 0: // No conversion required
+ m_pAlphabetManager = new CAlphabetManager(pInterface, this, pAlphInfo, pAlphMap);
+ break;
#ifdef JAPANESE
- case 1: // Japanese
- CConversionManager *pConversionManager =
+ case 1: {
+ // Japanese
+ CConversionManager *pConversionManager =
#ifdef WIN32
- new CIMEConversionHelper;
+ new CIMEConversionHelper;
#else
- new CCannaConversionHelper(this, pAlphInfo, GetLongParameter(LP_CONVERSION_TYPE), GetLongParameter(LP_CONVERSION_ORDER));
-#endif
- //TODO ownership/deletion
- m_pAlphabetManager = new CConvertingAlphMgr(pInterface, this, pConversionManager, pAlphInfo, pLanguageModel);
- break;
+ new CCannaConversionHelper(this, pAlphInfo, GetLongParameter(LP_CONVERSION_TYPE), GetLongParameter(LP_CONVERSION_ORDER));
#endif
- case 2: //(ACL) Modify AlphabetManager for Mandarin Dasher
- m_pAlphabetManager = new CMandarinAlphMgr(pInterface, this, pAlphInfo, pAlphMap, pLanguageModel);
+ //TODO ownership/deletion
+ m_pAlphabetManager = new CConvertingAlphMgr(pInterface, this, pConversionManager, pAlphInfo, pLanguageModel);
+ break;
}
-
+#endif
+ case 2:
+ //Mandarin Dasher!
+ //(ACL) Modify AlphabetManager for Mandarin Dasher
+ m_pAlphabetManager = new CMandarinAlphMgr(pInterface, this, pAlphInfo, pAlphMap);
+ break;
+ }
+ //all other configuration changes, etc., that might be necessary for a particular conversion mode,
+ // are implemented by AlphabetManager subclasses overriding the following two methods:
+ m_pAlphabetManager->CreateLanguageModel(pEventHandler, pSettingsStore);
+ m_pTrainer = m_pAlphabetManager->GetTrainer();
+
if (!pAlphInfo->GetTrainingFile().empty()) {
//1. Look for system training text...
CLockEvent oEvent("Training on System Text", true, 0);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]