[dasher] Most LanguageModels need only #syms, not alphabet; some are not DasherComponents
- From: Patrick Welche <pwelche src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dasher] Most LanguageModels need only #syms, not alphabet; some are not DasherComponents
- Date: Tue, 15 Mar 2011 17:12:37 +0000 (UTC)
commit 5004edb476be154856162b25f8b17c593459fc2e
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date: Sun Feb 20 22:24:27 2011 +0000
Most LanguageModels need only #syms, not alphabet; some are not DasherComponents
=> simplify CLanguageModel base class
Somewhere in here I found it necessary to remove an #include from AlphInfo,
this removes a cyclic #include but did have some fallout!
Src/DasherCore/Alphabet/AlphInfo.h | 1 -
Src/DasherCore/Alphabet/AlphabetMap.h | 1 +
Src/DasherCore/AlphabetManager.cpp | 4 +-
Src/DasherCore/AlphabetManager.h | 4 ++-
Src/DasherCore/ConversionManager.h | 1 +
Src/DasherCore/DasherNode.h | 1 +
.../LanguageModelling/CTWLanguageModel.cpp | 3 +-
.../LanguageModelling/CTWLanguageModel.h | 2 +-
.../LanguageModelling/DictLanguageModel.cpp | 6 ++--
.../LanguageModelling/DictLanguageModel.h | 8 ++--
Src/DasherCore/LanguageModelling/LanguageModel.cpp | 37 --------------------
Src/DasherCore/LanguageModelling/LanguageModel.h | 13 +++----
Src/DasherCore/LanguageModelling/Makefile.am | 1 -
.../LanguageModelling/MixtureLanguageModel.h | 8 ++--
.../LanguageModelling/PPMLanguageModel.cpp | 8 ++--
.../LanguageModelling/PPMLanguageModel.h | 9 +++--
.../LanguageModelling/PPMPYLanguageModel.cpp | 12 +++----
.../LanguageModelling/PPMPYLanguageModel.h | 13 +++----
.../LanguageModelling/WordLanguageModel.cpp | 6 ++--
.../LanguageModelling/WordLanguageModel.h | 8 +++--
Src/DasherCore/MandarinAlphMgr.cpp | 2 +-
Src/MacOSX/Dasher.xcodeproj/project.pbxproj | 4 --
Src/iPhone/Dasher.xcodeproj/project.pbxproj | 4 --
23 files changed, 55 insertions(+), 101 deletions(-)
---
diff --git a/Src/DasherCore/Alphabet/AlphInfo.h b/Src/DasherCore/Alphabet/AlphInfo.h
index 6cce20e..5b1a08f 100644
--- a/Src/DasherCore/Alphabet/AlphInfo.h
+++ b/Src/DasherCore/Alphabet/AlphInfo.h
@@ -29,7 +29,6 @@
#include "../DasherTypes.h"
#include "AlphabetMap.h"
#include "GroupInfo.h"
-#include "AlphIO.h"
#include <string>
#include <vector>
diff --git a/Src/DasherCore/Alphabet/AlphabetMap.h b/Src/DasherCore/Alphabet/AlphabetMap.h
index fedad4f..6bab5de 100644
--- a/Src/DasherCore/Alphabet/AlphabetMap.h
+++ b/Src/DasherCore/Alphabet/AlphabetMap.h
@@ -21,6 +21,7 @@
namespace Dasher {
class CAlphabetMap;
+ class CAlphInfo;
}
/// \ingroup Alphabet
diff --git a/Src/DasherCore/AlphabetManager.cpp b/Src/DasherCore/AlphabetManager.cpp
index 19075d6..91af931 100644
--- a/Src/DasherCore/AlphabetManager.cpp
+++ b/Src/DasherCore/AlphabetManager.cpp
@@ -61,7 +61,7 @@ void CAlphabetManager::CreateLanguageModel(CEventHandler *pEventHandler, CSettin
// If there is a bogus value for the language model ID, we'll default
// to our trusty old PPM language model.
case 0:
- m_pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet);
+ m_pLanguageModel = new CPPMLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet->GetNumberTextSymbols());
break;
case 2:
m_pLanguageModel = new CWordLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet, m_pAlphabetMap);
@@ -70,7 +70,7 @@ void CAlphabetManager::CreateLanguageModel(CEventHandler *pEventHandler, CSettin
m_pLanguageModel = new CMixtureLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet, m_pAlphabetMap);
break;
case 4:
- m_pLanguageModel = new CCTWLanguageModel(pEventHandler, pSettingsStore, m_pAlphabet);
+ m_pLanguageModel = new CCTWLanguageModel(m_pAlphabet->GetNumberTextSymbols());
break;
}
diff --git a/Src/DasherCore/AlphabetManager.h b/Src/DasherCore/AlphabetManager.h
index d0c4695..5d386a2 100644
--- a/Src/DasherCore/AlphabetManager.h
+++ b/Src/DasherCore/AlphabetManager.h
@@ -23,9 +23,11 @@
#include "LanguageModelling/LanguageModel.h"
#include "DasherNode.h"
-#include "Parameters.h"
#include "NodeManager.h"
#include "Trainer.h"
+#include "Alphabet/AlphInfo.h"
+#include "SettingsStore.h"
+#include "EventHandler.h"
class CNodeCreationManager;
struct SGroupInfo;
diff --git a/Src/DasherCore/ConversionManager.h b/Src/DasherCore/ConversionManager.h
index 72ae66e..de526f4 100644
--- a/Src/DasherCore/ConversionManager.h
+++ b/Src/DasherCore/ConversionManager.h
@@ -26,6 +26,7 @@
#include "DasherNode.h"
#include "SCENode.h"
#include "NodeManager.h"
+#include "Alphabet/AlphInfo.h"
// TODO: Conversion manager needs to deal with offsets and contexts - Will: See Phil for an explanation.
diff --git a/Src/DasherCore/DasherNode.h b/Src/DasherCore/DasherNode.h
index 66620c6..3e809b8 100644
--- a/Src/DasherCore/DasherNode.h
+++ b/Src/DasherCore/DasherNode.h
@@ -26,6 +26,7 @@
#include "LanguageModelling/LanguageModel.h"
#include "DasherTypes.h"
#include "NodeManager.h"
+#include "Alphabet/AlphabetMap.h"
namespace Dasher {
class CDasherNode;
diff --git a/Src/DasherCore/LanguageModelling/CTWLanguageModel.cpp b/Src/DasherCore/LanguageModelling/CTWLanguageModel.cpp
index 93911cd..50053b9 100644
--- a/Src/DasherCore/LanguageModelling/CTWLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/CTWLanguageModel.cpp
@@ -40,8 +40,7 @@ static char THIS_FILE[] = __FILE__;
#endif
-CCTWLanguageModel::CCTWLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CAlphInfo *pAlph)
-:CLanguageModel(pEventHandler, pSettingsStore, pAlph){
+CCTWLanguageModel::CCTWLanguageModel(int iNumSyms) : CLanguageModel(iNumSyms) {
Dasher::CHashTable HashTable; // create hashtable
MaxDepth = 6; // Maximum depth of the context tree
diff --git a/Src/DasherCore/LanguageModelling/CTWLanguageModel.h b/Src/DasherCore/LanguageModelling/CTWLanguageModel.h
index d151430..154304d 100644
--- a/Src/DasherCore/LanguageModelling/CTWLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/CTWLanguageModel.h
@@ -39,7 +39,7 @@ namespace Dasher {
// CTW language model
class CCTWLanguageModel: public CLanguageModel {
public:
- CCTWLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph);
+ CCTWLanguageModel(int iNumSyms);
virtual ~ CCTWLanguageModel();
Context CreateEmptyContext();
diff --git a/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp b/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
index 34b99b8..9e7077c 100644
--- a/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/DictLanguageModel.cpp
@@ -91,7 +91,7 @@ CDictLanguageModel::CDictnode * CDictLanguageModel::AddSymbolToNode(CDictnode *p
/////////////////////////////////////////////////////////////////////
CDictLanguageModel::CDictLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap)
-:CLanguageModel(pEventHandler, pSettingsStore, pAlph), m_pAlphMap(pAlphMap), NodesAllocated(0), max_order(0), m_NodeAlloc(8192), m_ContextAlloc(1024) {
+:CLanguageModel(pAlph->GetNumberTextSymbols()), CDasherComponent(pEventHandler, pSettingsStore), m_pAlphMap(pAlphMap), m_iSpaceSymbol(pAlph->GetSpaceSymbol()), NodesAllocated(0), max_order(0), m_NodeAlloc(8192), m_ContextAlloc(1024) {
m_pRoot = m_NodeAlloc.Alloc();
m_pRoot->sbl = -1;
m_rootcontext = new CDictContext(m_pRoot, 0);
@@ -530,7 +530,7 @@ void CDictLanguageModel::AddSymbol(CDictLanguageModel::CDictContext &context, sy
// Collapse the context if we have started a new word
- if(sym == m_pAlphabet->GetSpaceSymbol()) {
+ if(sym == m_iSpaceSymbol) {
CollapseContext(context);
}
@@ -562,7 +562,7 @@ void CDictLanguageModel::EnterSymbol(Context c, int Symbol) {
// collapse the context - the information required to update the
// word part of the context is stored in the string.
- if(Symbol == m_pAlphabet->GetSpaceSymbol()) {
+ if(Symbol == m_iSpaceSymbol) {
CollapseContext(context);
return;
}
diff --git a/Src/DasherCore/LanguageModelling/DictLanguageModel.h b/Src/DasherCore/LanguageModelling/DictLanguageModel.h
index d1911c5..8db59c3 100644
--- a/Src/DasherCore/LanguageModelling/DictLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/DictLanguageModel.h
@@ -11,10 +11,9 @@
#include "../../Common/NoClones.h"
#include "../../Common/Allocators/PooledAlloc.h"
-#include "LanguageModel.h"
#include "PPMLanguageModel.h"
-#include "../DasherTypes.h"
-
+#include "../Alphabet/AlphInfo.h"
+#include "../Alphabet/AlphabetMap.h"
#include <vector>
#include <map>
#include <string>
@@ -26,7 +25,7 @@
namespace Dasher {
/// \ingroup LM
/// \{
- class CDictLanguageModel:public CLanguageModel {
+ class CDictLanguageModel:public CLanguageModel, public CDasherComponent {
public:
CDictLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap);
virtual ~CDictLanguageModel();
@@ -82,6 +81,7 @@ namespace Dasher {
};
const CAlphabetMap *m_pAlphMap;
+ const int m_iSpaceSymbol;
CDictnode *AddSymbolToNode(CDictnode * pNode, symbol sym, int *update);
diff --git a/Src/DasherCore/LanguageModelling/LanguageModel.h b/Src/DasherCore/LanguageModelling/LanguageModel.h
index e23c307..816f9f8 100644
--- a/Src/DasherCore/LanguageModelling/LanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/LanguageModel.h
@@ -11,8 +11,7 @@
#include "../DasherTypes.h"
-#include "../Alphabet/AlphIO.h"
-#include "../DasherComponent.h"
+
#include <vector>
/////////////////////////////////////////////////////////////////////////////
@@ -30,17 +29,15 @@ namespace Dasher {
/// Base class for all language model components
///
-class Dasher::CLanguageModel:public Dasher::CDasherComponent
+class Dasher::CLanguageModel
{
public:
/////////////////////////////////////////////////////////////////////////////
- CLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CAlphInfo *pAlph);
+ CLanguageModel(int iNumSyms) : m_iNumSyms(iNumSyms) {};
virtual ~CLanguageModel() {};
-
- virtual void HandleEvent(Dasher::CEvent * pEvent);
///
/// Index of registered context
@@ -148,10 +145,10 @@ public:
///Return the number of symbols over which we are making predictions, plus one
/// (to leave space for an initial 0).
int GetSize() const {
- return m_pAlphabet->GetNumberTextSymbols()+1;
+ return m_iNumSyms+1;
}
- const CAlphInfo *m_pAlphabet;
+ const int m_iNumSyms;
};
diff --git a/Src/DasherCore/LanguageModelling/Makefile.am b/Src/DasherCore/LanguageModelling/Makefile.am
index 8df6997..4ac9b80 100644
--- a/Src/DasherCore/LanguageModelling/Makefile.am
+++ b/Src/DasherCore/LanguageModelling/Makefile.am
@@ -7,7 +7,6 @@ libdasherlm_a_SOURCES = \
DictLanguageModel.h \
HashTable.cpp \
HashTable.h \
- LanguageModel.cpp \
LanguageModel.h \
MixtureLanguageModel.h \
PPMLanguageModel.cpp \
diff --git a/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h b/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h
index b4b18cc..671992c 100644
--- a/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/MixtureLanguageModel.h
@@ -23,19 +23,19 @@ namespace Dasher {
/// \ingroup LM
/// \{
- class CMixtureLanguageModel:public CLanguageModel {
+ class CMixtureLanguageModel:public CLanguageModel, public CDasherComponent {
public:
/////////////////////////////////////////////////////////////////////////////
- CMixtureLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap):CLanguageModel(pEventHandler, pSettingsStore, pAlph) {
+ CMixtureLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap):CLanguageModel(pAlph->GetNumberTextSymbols()), CDasherComponent(pEventHandler, pSettingsStore) {
// std::cout << m_pAlphabet << std::endl;
NextContext = 0;
- lma = new CPPMLanguageModel(m_pEventHandler, m_pSettingsStore, m_pAlphabet);
- lmb = new CDictLanguageModel(m_pEventHandler, m_pSettingsStore, m_pAlphabet, pAlphMap);
+ lma = new CPPMLanguageModel(m_pEventHandler, m_pSettingsStore, m_iNumSyms);
+ lmb = new CDictLanguageModel(m_pEventHandler, m_pSettingsStore, pAlph, pAlphMap);
};
diff --git a/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp b/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp
index a01c29b..1d6d7f6 100644
--- a/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp
@@ -30,8 +30,8 @@ static char THIS_FILE[] = __FILE__;
/////////////////////////////////////////////////////////////////////
-CAbstractPPM::CAbstractPPM(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CAlphInfo *pAlph, CPPMnode *pRoot, int iMaxOrder)
-: CLanguageModel(pEventHandler, pSettingsStore, pAlph), m_pRoot(pRoot), m_iMaxOrder(iMaxOrder), bUpdateExclusion( GetLongParameter(LP_LM_UPDATE_EXCLUSION)!=0 ), m_ContextAlloc(1024) {
+CAbstractPPM::CAbstractPPM(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, int iNumSyms, CPPMnode *pRoot, int iMaxOrder)
+: CLanguageModel(iNumSyms), CDasherComponent(pEventHandler, pSettingsStore), m_pRoot(pRoot), m_iMaxOrder(iMaxOrder), bUpdateExclusion( GetLongParameter(LP_LM_UPDATE_EXCLUSION)!=0 ), m_ContextAlloc(1024) {
m_pRootContext = m_ContextAlloc.Alloc();
m_pRootContext->head = m_pRoot;
m_pRootContext->order = 0;
@@ -444,8 +444,8 @@ CAbstractPPM::CPPMnode * CAbstractPPM::AddSymbolToNode(CPPMnode *pNode, symbol s
return pReturn;
}
-CPPMLanguageModel::CPPMLanguageModel(CEventHandler *pEvt, CSettingsStore *sets, const CAlphInfo *pAlph)
-: CAbstractPPM(pEvt, sets, pAlph, new CPPMnode(-1), sets->GetLongParameter(LP_LM_MAX_ORDER)), NodesAllocated(0), m_NodeAlloc(8192) {
+CPPMLanguageModel::CPPMLanguageModel(CEventHandler *pEvt, CSettingsStore *sets, int iNumSyms)
+: CAbstractPPM(pEvt, sets, iNumSyms, new CPPMnode(-1), sets->GetLongParameter(LP_LM_MAX_ORDER)), NodesAllocated(0), m_NodeAlloc(8192) {
}
CAbstractPPM::CPPMnode *CPPMLanguageModel::makeNode(int sym) {
diff --git a/Src/DasherCore/LanguageModelling/PPMLanguageModel.h b/Src/DasherCore/LanguageModelling/PPMLanguageModel.h
index 37b034a..10d0fcc 100644
--- a/Src/DasherCore/LanguageModelling/PPMLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/PPMLanguageModel.h
@@ -13,11 +13,12 @@
#include "../../Common/Allocators/PooledAlloc.h"
#include "LanguageModel.h"
-
+#include "../DasherComponent.h"
#include "stdlib.h"
#include <vector>
#include <fstream>
#include <set>
+#include <map>
namespace Dasher {
@@ -33,7 +34,7 @@ namespace Dasher {
/// Subclasses must implement CLanguageModel::GetProbs and a makeNode() method (perhaps
/// using a pooled allocator).
///
- class CAbstractPPM :public CLanguageModel, private NoClones {
+ class CAbstractPPM :public CLanguageModel, public CDasherComponent, private NoClones {
protected:
class ChildIterator;
class CPPMnode {
@@ -99,7 +100,7 @@ namespace Dasher {
///Makes a new node, of whatever kind (subclass of CPPMnode, perhaps with extra info)
/// is required by the subclass, for the specified symbol. (Initial count will be 1.)
virtual CPPMnode *makeNode(int sym)=0;
- CAbstractPPM(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, CPPMnode *pRoot, int iMaxOrder);
+ CAbstractPPM(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, int iNumSyms, CPPMnode *pRoot, int iMaxOrder);
void dumpSymbol(symbol sym);
void dumpString(char *str, int pos, int len);
@@ -138,7 +139,7 @@ namespace Dasher {
/// max order from LP_LM_MAX_ORDER.
class CPPMLanguageModel : public CAbstractPPM {
public:
- CPPMLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSets, const CAlphInfo *pAlph);
+ CPPMLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSets, int iNumSyms);
virtual void GetProbs(Context context, std::vector < unsigned int >&Probs, int norm, int iUniform) const;
protected:
/// Makes a standard CPPMnode, but using a pooled allocator (m_NodeAlloc) - faster!
diff --git a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp
index cf12a66..c401278 100644
--- a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp
@@ -33,10 +33,8 @@ static char THIS_FILE[] = __FILE__;
/////////////////////////////////////////////////////////////////////
-CPPMPYLanguageModel::CPPMPYLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CAlphInfo *pAlph, const CAlphInfo *pPyAlphabet)
- :CAbstractPPM(pEventHandler, pSettingsStore, pAlph, new CPPMPYnode(-1), 2), NodesAllocated(0), m_NodeAlloc(8192), m_pPyAlphabet(pPyAlphabet){
-
- m_iAlphSize = GetSize();
+CPPMPYLanguageModel::CPPMPYLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, int iNumCHsyms, int iNumPYsyms)
+ :CAbstractPPM(pEventHandler, pSettingsStore, iNumCHsyms, new CPPMPYnode(-1), 2), NodesAllocated(0), m_NodeAlloc(8192), m_iNumPYsyms(iNumPYsyms) {
}
/////////////////////////////////////////////////////////////////////
@@ -194,7 +192,7 @@ void CPPMPYLanguageModel::GetPartProbs(Context context, std::vector<pair<symbol,
// In Will's code, it assigned 0 to the first entry, then split evenly among the rest...seems wrong?!
int i=0;
for (std::vector<pair<symbol, unsigned int> >::iterator it = vChildren.begin(); it!=vChildren.end(); it++) {
- DASHER_ASSERT(it->first > -1 && it->first <= m_iAlphSize);
+ DASHER_ASSERT(it->first > 0 && it->first < GetSize()); //i.e., is valid CH symbol
it->second = iUniformLeft / (vChildren.size() - i);
// std::cout<<"iUniformLeft: "<<iUniformLeft<<std::endl;
iUniformLeft -= it->second;
@@ -299,7 +297,7 @@ void CPPMPYLanguageModel::GetProbs(Context context, std::vector<unsigned int> &p
*/
// DASHER_ASSERT(m_setContexts.count(ppmcontext) > 0);
- int iNumSymbols = m_pPyAlphabet->GetNumberTextSymbols()+1;
+ int iNumSymbols = m_iNumPYsyms+1;
probs.resize(iNumSymbols);
@@ -403,7 +401,7 @@ void CPPMPYLanguageModel::LearnPYSymbol(Context c, int pysym) {
if(pysym==0)
return;
- DASHER_ASSERT(pysym > 0 && pysym <= m_pPyAlphabet->GetNumberTextSymbols());
+ DASHER_ASSERT(pysym > 0 && pysym <= m_iNumPYsyms);
CPPMPYLanguageModel::CPPMContext & context = *(CPPMContext *) (c);
// std::cout<<"py learn context : "<<context.head->symbol<<std::endl;
diff --git a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h
index 707ef23..5252811 100644
--- a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h
@@ -40,11 +40,11 @@ namespace Dasher {
class CPPMPYLanguageModel : public CAbstractPPM {
public:
///Construct a new PPMPYLanguageModel.
- /// \param pAlph alphabet containing the actual symbols we want to write (i.e. Chinese); this
- /// is the only alphabet passed to the CAbstractPPM superclass.
- /// \param pPyAlph alphabet of pinyin phonemes; we will predict probabilities for these
+ /// \param iNumCHSyms number of symbols in the alphabet in which we actually want to write (i.e. Chinese),
+ /// i.e. from which contexts are formed; this is passed to the CAbstractPPM superclass.
+ /// \param iNumPYSyms number of pinyin phonemes, i.e. which we generate probabilities for in GetProbs
/// based (only) on the preceding _Chinese_ symbols.
- CPPMPYLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, const CAlphInfo *pPyAlph);
+ CPPMPYLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, int iNumCHsyms, int iNumPYsyms);
///Learns a pinyin symbol in the specified context, but does not move the context on.
void LearnPYSymbol(Context context, int Symbol);
@@ -52,7 +52,7 @@ namespace Dasher {
///Predicts probabilities for the next Pinyin symbol (blending as per PPM,
/// but using the pychild map rather than child CPPMPYnodes).
/// \param Probs vector to fill with predictions for pinyin symbols: will be filled
- /// with m_pPyAlphabet->GetNumberTextSymbols() numbers plus an initial 0.
+ /// with m_iNumPYsyms numbers plus an initial 0.
virtual void GetProbs(Context context, std::vector < unsigned int >&Probs, int norm, int iUniform) const;
///Predicts probabilities for the next Chinese symbol, filtered to only include symbols within a specified set.
@@ -80,8 +80,7 @@ namespace Dasher {
int NodesAllocated;
mutable CSimplePooledAlloc < CPPMPYnode > m_NodeAlloc;
- const CAlphInfo *m_pPyAlphabet;
- int m_iAlphSize;
+ const int m_iNumPYsyms;
};
/// @}
diff --git a/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp b/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
index bb3eb9c..fedf503 100644
--- a/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/WordLanguageModel.cpp
@@ -123,7 +123,7 @@ CWordLanguageModel::CWordnode * CWordLanguageModel::AddSymbolToNode(CWordnode *p
CWordLanguageModel::CWordLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore,
const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap)
- :CLanguageModel(pEventHandler, pSettingsStore, pAlph), m_pAlphMap(pAlphMap), NodesAllocated(0),
+ :CLanguageModel(pAlph->GetNumberTextSymbols()), CDasherComponent(pEventHandler, pSettingsStore), m_pAlphMap(pAlphMap), m_iSpaceSymbol(pAlph->GetSpaceSymbol()), NodesAllocated(0),
max_order(2), m_NodeAlloc(8192), m_ContextAlloc(1024) {
// Construct a root node for the trie
@@ -134,7 +134,7 @@ CWordLanguageModel::CWordLanguageModel(Dasher::CEventHandler *pEventHandler, CSe
// Create a spelling model
- pSpellingModel = new CPPMLanguageModel(m_pEventHandler, m_pSettingsStore, m_pAlphabet);
+ pSpellingModel = new CPPMLanguageModel(m_pEventHandler, m_pSettingsStore, m_iNumSyms);
// Construct a root context
@@ -626,7 +626,7 @@ void CWordLanguageModel::AddSymbol(CWordLanguageModel::CWordContext &context, sy
// Collapse the context (with learning) if we've just entered a space
// FIXME - we need to generalise this for more languages.
- if(sym == m_pAlphabet->GetSpaceSymbol()) {
+ if(sym == m_iSpaceSymbol) {
CollapseContext(context, bLearn);
context.m_dSpellingFactor = 1.0;
}
diff --git a/Src/DasherCore/LanguageModelling/WordLanguageModel.h b/Src/DasherCore/LanguageModelling/WordLanguageModel.h
index 09746ad..9a954a4 100644
--- a/Src/DasherCore/LanguageModelling/WordLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/WordLanguageModel.h
@@ -13,9 +13,10 @@
#include "../../Common/NoClones.h"
#include "../../Common/Allocators/PooledAlloc.h"
-#include "LanguageModel.h"
#include "PPMLanguageModel.h"
-#include "../DasherTypes.h"
+#include "../DasherComponent.h"
+#include "../Alphabet/AlphInfo.h"
+#include "../Alphabet/AlphabetMap.h"
#include <vector>
#include <map>
@@ -33,7 +34,7 @@ namespace Dasher {
///
/// Language model using words
///
- class CWordLanguageModel:public CLanguageModel {
+ class CWordLanguageModel:public CLanguageModel, public CDasherComponent {
public:
CWordLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CAlphInfo *pAlph, const CAlphabetMap *pAlphMap);
virtual ~ CWordLanguageModel();
@@ -114,6 +115,7 @@ namespace Dasher {
int lookup_word_const(const std::string & w) const;
const CAlphabetMap *m_pAlphMap;
+ const int m_iSpaceSymbol;
CWordContext *m_rootcontext;
CWordnode *m_pRoot;
diff --git a/Src/DasherCore/MandarinAlphMgr.cpp b/Src/DasherCore/MandarinAlphMgr.cpp
index f556ad4..26a3473 100644
--- a/Src/DasherCore/MandarinAlphMgr.cpp
+++ b/Src/DasherCore/MandarinAlphMgr.cpp
@@ -108,7 +108,7 @@ CMandarinAlphMgr::~CMandarinAlphMgr() {
void CMandarinAlphMgr::CreateLanguageModel(CEventHandler *pEventHandler, CSettingsStore *pSettingsStore) {
//std::cout<<"CHALphabet size "<< pCHAlphabet->GetNumberTextSymbols(); [7603]
std::cout<<"Setting PPMPY model"<<std::endl;
- m_pLanguageModel = new CPPMPYLanguageModel(pEventHandler, pSettingsStore, m_pCHAlphabet, m_pAlphabet);
+ m_pLanguageModel = new CPPMPYLanguageModel(pEventHandler, pSettingsStore, m_pCHAlphabet->GetNumberTextSymbols(), m_pAlphabet->GetNumberTextSymbols());
//our superclass destructor will call ReleaseContext on the iLearnContext when we are destroyed,
// so we need to put _something_ in there (even tho we don't use it atm!)...
m_iLearnContext = m_pLanguageModel->CreateEmptyContext();
diff --git a/Src/MacOSX/Dasher.xcodeproj/project.pbxproj b/Src/MacOSX/Dasher.xcodeproj/project.pbxproj
index 7ac5b5b..90f762b 100755
--- a/Src/MacOSX/Dasher.xcodeproj/project.pbxproj
+++ b/Src/MacOSX/Dasher.xcodeproj/project.pbxproj
@@ -89,7 +89,6 @@
1948BEF90C226CFD001DFA32 /* DictLanguageModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 1948BE540C226CFD001DFA32 /* DictLanguageModel.h */; };
1948BEFA0C226CFD001DFA32 /* HashTable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1948BE550C226CFD001DFA32 /* HashTable.cpp */; };
1948BEFB0C226CFD001DFA32 /* HashTable.h in Headers */ = {isa = PBXBuildFile; fileRef = 1948BE560C226CFD001DFA32 /* HashTable.h */; };
- 1948BF030C226CFD001DFA32 /* LanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1948BE5E0C226CFD001DFA32 /* LanguageModel.cpp */; };
1948BF040C226CFD001DFA32 /* LanguageModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 1948BE5F0C226CFD001DFA32 /* LanguageModel.h */; };
1948BF060C226CFD001DFA32 /* MixtureLanguageModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 1948BE610C226CFD001DFA32 /* MixtureLanguageModel.h */; };
1948BF070C226CFD001DFA32 /* PPMLanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1948BE620C226CFD001DFA32 /* PPMLanguageModel.cpp */; };
@@ -494,7 +493,6 @@
1948BE5B0C226CFD001DFA32 /* KanjiConversionCanna.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = KanjiConversionCanna.h; sourceTree = "<group>"; };
1948BE5C0C226CFD001DFA32 /* KanjiConversionIME.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = KanjiConversionIME.cpp; sourceTree = "<group>"; };
1948BE5D0C226CFD001DFA32 /* KanjiConversionIME.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = KanjiConversionIME.h; sourceTree = "<group>"; };
- 1948BE5E0C226CFD001DFA32 /* LanguageModel.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = LanguageModel.cpp; sourceTree = "<group>"; };
1948BE5F0C226CFD001DFA32 /* LanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = LanguageModel.h; sourceTree = "<group>"; };
1948BE610C226CFD001DFA32 /* MixtureLanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = MixtureLanguageModel.h; sourceTree = "<group>"; };
1948BE620C226CFD001DFA32 /* PPMLanguageModel.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = PPMLanguageModel.cpp; sourceTree = "<group>"; };
@@ -1077,7 +1075,6 @@
1948BE5B0C226CFD001DFA32 /* KanjiConversionCanna.h */,
1948BE5C0C226CFD001DFA32 /* KanjiConversionIME.cpp */,
1948BE5D0C226CFD001DFA32 /* KanjiConversionIME.h */,
- 1948BE5E0C226CFD001DFA32 /* LanguageModel.cpp */,
1948BE5F0C226CFD001DFA32 /* LanguageModel.h */,
1948BE610C226CFD001DFA32 /* MixtureLanguageModel.h */,
1948BE620C226CFD001DFA32 /* PPMLanguageModel.cpp */,
@@ -1780,7 +1777,6 @@
1948BEF60C226CFD001DFA32 /* CTWLanguageModel.cpp in Sources */,
1948BEF80C226CFD001DFA32 /* DictLanguageModel.cpp in Sources */,
1948BEFA0C226CFD001DFA32 /* HashTable.cpp in Sources */,
- 1948BF030C226CFD001DFA32 /* LanguageModel.cpp in Sources */,
1948BF070C226CFD001DFA32 /* PPMLanguageModel.cpp in Sources */,
1948BF0A0C226CFD001DFA32 /* WordLanguageModel.cpp in Sources */,
1948BF0D0C226CFD001DFA32 /* MemoryLeak.cpp in Sources */,
diff --git a/Src/iPhone/Dasher.xcodeproj/project.pbxproj b/Src/iPhone/Dasher.xcodeproj/project.pbxproj
index aee49eb..5e22a5c 100755
--- a/Src/iPhone/Dasher.xcodeproj/project.pbxproj
+++ b/Src/iPhone/Dasher.xcodeproj/project.pbxproj
@@ -195,7 +195,6 @@
3344FE440F71717C00506EAA /* CTWLanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDC70F71717C00506EAA /* CTWLanguageModel.cpp */; };
3344FE450F71717C00506EAA /* DictLanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDC90F71717C00506EAA /* DictLanguageModel.cpp */; };
3344FE460F71717C00506EAA /* HashTable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDCB0F71717C00506EAA /* HashTable.cpp */; };
- 3344FE4A0F71717C00506EAA /* LanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDD40F71717C00506EAA /* LanguageModel.cpp */; };
3344FE4C0F71717C00506EAA /* PPMLanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDD80F71717C00506EAA /* PPMLanguageModel.cpp */; };
3344FE4D0F71717C00506EAA /* WordLanguageModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDDB0F71717C00506EAA /* WordLanguageModel.cpp */; };
3344FE4F0F71717C00506EAA /* MemoryLeak.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3344FDDE0F71717C00506EAA /* MemoryLeak.cpp */; };
@@ -606,7 +605,6 @@
3344FDCA0F71717C00506EAA /* DictLanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DictLanguageModel.h; sourceTree = "<group>"; };
3344FDCB0F71717C00506EAA /* HashTable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HashTable.cpp; sourceTree = "<group>"; };
3344FDCC0F71717C00506EAA /* HashTable.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HashTable.h; sourceTree = "<group>"; };
- 3344FDD40F71717C00506EAA /* LanguageModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LanguageModel.cpp; sourceTree = "<group>"; };
3344FDD50F71717C00506EAA /* LanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LanguageModel.h; sourceTree = "<group>"; };
3344FDD70F71717C00506EAA /* MixtureLanguageModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MixtureLanguageModel.h; sourceTree = "<group>"; };
3344FDD80F71717C00506EAA /* PPMLanguageModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PPMLanguageModel.cpp; sourceTree = "<group>"; };
@@ -1325,7 +1323,6 @@
3344FDCA0F71717C00506EAA /* DictLanguageModel.h */,
3344FDCB0F71717C00506EAA /* HashTable.cpp */,
3344FDCC0F71717C00506EAA /* HashTable.h */,
- 3344FDD40F71717C00506EAA /* LanguageModel.cpp */,
3344FDD50F71717C00506EAA /* LanguageModel.h */,
3344FDD70F71717C00506EAA /* MixtureLanguageModel.h */,
3344FDD80F71717C00506EAA /* PPMLanguageModel.cpp */,
@@ -1590,7 +1587,6 @@
3344FE440F71717C00506EAA /* CTWLanguageModel.cpp in Sources */,
3344FE450F71717C00506EAA /* DictLanguageModel.cpp in Sources */,
3344FE460F71717C00506EAA /* HashTable.cpp in Sources */,
- 3344FE4A0F71717C00506EAA /* LanguageModel.cpp in Sources */,
3344FE4C0F71717C00506EAA /* PPMLanguageModel.cpp in Sources */,
3344FE4D0F71717C00506EAA /* WordLanguageModel.cpp in Sources */,
3344FE4F0F71717C00506EAA /* MemoryLeak.cpp in Sources */,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]