[dasher: 5/16] Code tidies and namespace cleanups to LanguageModelling classes
- From: Patrick Welche <pwelche src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [dasher: 5/16] Code tidies and namespace cleanups to LanguageModelling classes
- Date: Tue, 1 Dec 2009 16:14:33 +0000 (UTC)
commit 75a3b89f6aa47df23b7f312c92e0e198c062edb1
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date: Wed Jul 29 17:09:07 2009 +0100
Code tidies and namespace cleanups to LanguageModelling classes
*Renamed {PPMPY,Japanese}LanguageModel node/context classes (better "grep")
*Build fixes to JapaneseLanguageModel (wrt recent Alphabet code tidying)
*Made {PPM,PPMPY} node/context classes private
*PPMLanguageModel uses Dasher::symbol type; 'symbol' field renamed to sym
.../LanguageModelling/JapaneseLanguageModel.cpp | 30 +++---
.../LanguageModelling/JapaneseLanguageModel.h | 55 ++++++-----
.../LanguageModelling/PPMLanguageModel.cpp | 30 +++---
.../LanguageModelling/PPMLanguageModel.h | 62 ++++++------
.../LanguageModelling/PPMPYLanguageModel.cpp | 88 +++++++++---------
.../LanguageModelling/PPMPYLanguageModel.h | 103 ++++++++++----------
6 files changed, 184 insertions(+), 184 deletions(-)
---
diff --git a/Src/DasherCore/LanguageModelling/JapaneseLanguageModel.cpp b/Src/DasherCore/LanguageModelling/JapaneseLanguageModel.cpp
index 4c473b0..ac1ca8a 100644
--- a/Src/DasherCore/LanguageModelling/JapaneseLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/JapaneseLanguageModel.cpp
@@ -62,7 +62,7 @@ CJapaneseLanguageModel::~CJapaneseLanguageModel() {
// Get the probability distribution at the context
void CJapaneseLanguageModel::GetProbs(Context context, std::vector<unsigned int> &probs, int norm) const {
- CPPMContext *ppmcontext = (CPPMContext *) (context);
+ CJaPPMContext *ppmcontext = (CJaPPMContext *) (context);
int iNumSymbols = GetSize();
@@ -86,7 +86,7 @@ void CJapaneseLanguageModel::GetProbs(Context context, std::vector<unsigned int>
unsigned int iToSpend = norm;
- CPPMnode *pTemp = ppmcontext->head;
+ CJaPPMnode *pTemp = ppmcontext->head;
bool has_convert_symbol = 0; // Flag to show if a conversion symbol appears in the history
std::vector < symbol > hiragana; // Hiragana sequence to be converted
@@ -158,7 +158,7 @@ void CJapaneseLanguageModel::GetProbs(Context context, std::vector<unsigned int>
//cout << "[" << j << "]" << cand_list[j] << endl;
std::vector < symbol > new_cand;
//SetCandidateString(cand_list[j]);
- GetSymbols(&new_cand, &cand_list[j], false);
+ GetSymbols(new_cand, cand_list[j]);
/*for( int k=0; k<new_cand.size(); k++ )
cout << GetText(new_cand[k]) << "[" << new_cand[k] << "] ";
cout << endl; */
@@ -251,7 +251,7 @@ void CJapaneseLanguageModel::GetProbs(Context context, std::vector<unsigned int>
while(pTemp != 0) {
int iTotal = 0;
- CPPMnode *pSymbol = pTemp->child;
+ CJaPPMnode *pSymbol = pTemp->child;
while(pSymbol) {
int sym = pSymbol->symbol;
if(!(exclusions[sym] && doExclusion))
@@ -318,14 +318,14 @@ void CJapaneseLanguageModel::GetProbs(Context context, std::vector<unsigned int>
DASHER_ASSERT(iToSpend == 0);
}
-void CJapaneseLanguageModel::AddSymbol(CJapaneseLanguageModel::CPPMContext &context, int sym)
+void CJapaneseLanguageModel::AddSymbol(CJapaneseLanguageModel::CJaPPMContext &context, int sym)
// add symbol to the context
// creates new nodes, updates counts
// and leaves 'context' at the new context
{
DASHER_ASSERT(sym >= 0 && sym <= GetSize());
- CPPMnode *vineptr, *temp;
+ CJaPPMnode *vineptr, *temp;
int updatecnt = 1;
temp = context.head->vine;
@@ -355,9 +355,9 @@ void CJapaneseLanguageModel::AddSymbol(CJapaneseLanguageModel::CPPMContext &cont
void CJapaneseLanguageModel::EnterSymbol(Context c, int Symbol) {
DASHER_ASSERT(Symbol >= 0 && Symbol <= GetSize());
- CJapaneseLanguageModel::CPPMContext & context = *(CPPMContext *) (c);
+ CJapaneseLanguageModel::CJaPPMContext & context = *(CJaPPMContext *) (c);
- CPPMnode *find;
+ CJaPPMnode *find;
context.history.push_back(Symbol);
if(context.history.size() > 100) {
@@ -399,7 +399,7 @@ void CJapaneseLanguageModel::EnterSymbol(Context c, int Symbol) {
void CJapaneseLanguageModel::LearnSymbol(Context c, int Symbol) {
DASHER_ASSERT(Symbol >= 0 && Symbol <= GetSize());
- CJapaneseLanguageModel::CPPMContext & context = *(CPPMContext *) (c);
+ CJapaneseLanguageModel::CJaPPMContext & context = *(CJaPPMContext *) (c);
AddSymbol(context, Symbol);
}
@@ -424,14 +424,14 @@ void CJapaneseLanguageModel::dumpString(char *str, int pos, int len)
}
}
-void CJapaneseLanguageModel::dumpTrie(CJapaneseLanguageModel::CPPMnode *t, int d)
+void CJapaneseLanguageModel::dumpTrie(CJapaneseLanguageModel::CJaPPMnode *t, int d)
// diagnostic display of the PPM trie from node t and deeper
{
//TODO
/*
dchar debug[256];
int sym;
- CPPMnode *s;
+ CJaPPMnode *s;
Usprintf( debug,TEXT("%5d %7x "), d, t );
//TODO: Uncomment this when headers sort out
//DebugOutput(debug);
@@ -489,11 +489,11 @@ void CJapaneseLanguageModel::dump()
/// PPMnode definitions
////////////////////////////////////////////////////////////////////////
-CJapaneseLanguageModel::CPPMnode * CJapaneseLanguageModel::CPPMnode::find_symbol(int sym) const
+CJapaneseLanguageModel::CJaPPMnode * CJapaneseLanguageModel::CJaPPMnode::find_symbol(int sym) const
// see if symbol is a child of node
{
// printf("finding symbol %d at node %d\n",sym,node->id);
- CPPMnode *found = child;
+ CJaPPMnode *found = child;
while(found) {
if(found->symbol == sym) {
@@ -504,8 +504,8 @@ CJapaneseLanguageModel::CPPMnode * CJapaneseLanguageModel::CPPMnode::find_symbol
return 0;
}
-CJapaneseLanguageModel::CPPMnode * CJapaneseLanguageModel::AddSymbolToNode(CPPMnode *pNode, int sym, int *update) {
- CPPMnode *pReturn = pNode->find_symbol(sym);
+CJapaneseLanguageModel::CJaPPMnode * CJapaneseLanguageModel::AddSymbolToNode(CJaPPMnode *pNode, int sym, int *update) {
+ CJaPPMnode *pReturn = pNode->find_symbol(sym);
// std::cout << sym << ",";
diff --git a/Src/DasherCore/LanguageModelling/JapaneseLanguageModel.h b/Src/DasherCore/LanguageModelling/JapaneseLanguageModel.h
index 714789f..98737c3 100644
--- a/Src/DasherCore/LanguageModelling/JapaneseLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/JapaneseLanguageModel.h
@@ -14,6 +14,7 @@
#include "../../Common/Allocators/PooledAlloc.h"
#include "LanguageModel.h"
+#include "Alphabet.h"
namespace Dasher {
/// \ingroup LM
@@ -37,35 +38,35 @@ namespace Dasher {
private:
- class CPPMnode {
+ class CJaPPMnode {
public:
- CPPMnode * find_symbol(int sym) const;
- CPPMnode *child;
- CPPMnode *next;
- CPPMnode *vine;
+ CJaPPMnode * find_symbol(int sym) const;
+ CJaPPMnode *child;
+ CJaPPMnode *next;
+ CJaPPMnode *vine;
unsigned short int count;
short int symbol;
- CPPMnode(int sym);
- CPPMnode();
+ CJaPPMnode(int sym);
+ CJaPPMnode();
};
- class CPPMContext {
+ class CJaPPMContext {
public:
- CPPMContext(CPPMContext const &input) {
+ CJaPPMContext(CJaPPMContext const &input) {
head = input.head;
order = input.order;
history = input.history;
- } CPPMContext(CPPMnode * _head = 0, int _order = 0):head(_head), order(_order) {
+ } CJaPPMContext(CJaPPMnode * _head = 0, int _order = 0):head(_head), order(_order) {
};
- ~CPPMContext() {
+ ~CJaPPMContext() {
};
void dump();
- CPPMnode *head;
+ CJaPPMnode *head;
int order;
std::vector < symbol > history;
};
- CPPMnode *AddSymbolToNode(CPPMnode * pNode, int sym, int *update);
+ CJaPPMnode *AddSymbolToNode(CJaPPMnode * pNode, int sym, int *update);
//--Start:Kanji Conversion Related Addition
symbol GetStartConversionSymbol() const {
@@ -80,17 +81,17 @@ namespace Dasher {
const std::string & GetDisplayText(symbol i) const {
return SymbolAlphabet().GetAlphabetPointer()->GetDisplayText(i);
} // return string for i'th symbol
- const void GetSymbols(std::vector < symbol > *Symbols, std::string * Input, bool IsMore) const {
- SymbolAlphabet().GetAlphabetPointer()->GetSymbols(Symbols, Input, IsMore);
+ const void GetSymbols(std::vector < symbol > &Symbols, std::string &Input) const {
+ SymbolAlphabet().GetAlphabetPointer()->GetSymbols(Symbols, Input);
}
//--End:Kanji Conversion Related
- virtual void AddSymbol(CPPMContext & context, int sym);
+ virtual void AddSymbol(CJaPPMContext & context, int sym);
void dumpSymbol(int sym);
void dumpString(char *str, int pos, int len);
- void dumpTrie(CPPMnode * t, int d);
+ void dumpTrie(CJaPPMnode * t, int d);
- CPPMContext *m_pRootContext;
- CPPMnode *m_pRoot;
+ CJaPPMContext *m_pRootContext;
+ CJaPPMnode *m_pRoot;
int m_iMaxOrder;
double m_dBackOffConstat;
@@ -99,21 +100,21 @@ namespace Dasher {
bool bUpdateExclusion;
- mutable CSimplePooledAlloc < CPPMnode > m_NodeAlloc;
- CPooledAlloc < CPPMContext > m_ContextAlloc;
+ mutable CSimplePooledAlloc < CJaPPMnode > m_NodeAlloc;
+ CPooledAlloc < CJaPPMContext > m_ContextAlloc;
};
/// \}
////////////////////////////////////////////////////////////////////////
- inline Dasher::CJapaneseLanguageModel::CPPMnode::CPPMnode(int sym):symbol(sym) {
+ inline Dasher::CJapaneseLanguageModel::CJaPPMnode::CJaPPMnode(int sym):symbol(sym) {
child = next = vine = 0;
count = 1;
}
////////////////////////////////////////////////////////////////////////
- inline CJapaneseLanguageModel::CPPMnode::CPPMnode() {
+ inline CJapaneseLanguageModel::CJaPPMnode::CJaPPMnode() {
child = next = vine = 0;
count = 1;
}
@@ -121,7 +122,7 @@ namespace Dasher {
///////////////////////////////////////////////////////////////////
inline CLanguageModel::Context CJapaneseLanguageModel::CreateEmptyContext() {
- CPPMContext *pCont = m_ContextAlloc.Alloc();
+ CJaPPMContext *pCont = m_ContextAlloc.Alloc();
*pCont = *m_pRootContext;
return (Context) pCont;
}
@@ -129,8 +130,8 @@ namespace Dasher {
///////////////////////////////////////////////////////////////////
inline CLanguageModel::Context CJapaneseLanguageModel::CloneContext(Context Copy) {
- CPPMContext *pCont = m_ContextAlloc.Alloc();
- CPPMContext *pCopy = (CPPMContext *) Copy;
+ CJaPPMContext *pCont = m_ContextAlloc.Alloc();
+ CJaPPMContext *pCopy = (CJaPPMContext *) Copy;
*pCont = *pCopy;
return (Context) pCont;
}
@@ -138,7 +139,7 @@ namespace Dasher {
///////////////////////////////////////////////////////////////////
inline void CJapaneseLanguageModel::ReleaseContext(Context release) {
- m_ContextAlloc.Free((CPPMContext *) release);
+ m_ContextAlloc.Free((CJaPPMContext *) release);
}
///////////////////////////////////////////////////////////////////
diff --git a/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp b/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp
index 8c64974..0dbe02c 100644
--- a/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/PPMLanguageModel.cpp
@@ -32,7 +32,7 @@ static char THIS_FILE[] = __FILE__;
CPPMLanguageModel::CPPMLanguageModel(Dasher::CEventHandler *pEventHandler, CSettingsStore *pSettingsStore, const CSymbolAlphabet &SymbolAlphabet)
:CLanguageModel(pEventHandler, pSettingsStore, SymbolAlphabet), m_iMaxOrder(4), NodesAllocated(0), m_NodeAlloc(8192), m_ContextAlloc(1024) {
m_pRoot = m_NodeAlloc.Alloc();
- m_pRoot->symbol = -1;
+ m_pRoot->sym = -1;
m_pRootContext = m_ContextAlloc.Alloc();
m_pRootContext->head = m_pRoot;
@@ -95,7 +95,7 @@ void CPPMLanguageModel::GetProbs(Context context, std::vector<unsigned int> &pro
CPPMnode *pSymbol = pTemp->child;
while(pSymbol) {
- int sym = pSymbol->symbol;
+ symbol sym = pSymbol->sym;
if(!(exclusions[sym] && doExclusion))
iTotal += pSymbol->count;
pSymbol = pSymbol->next;
@@ -105,12 +105,12 @@ void CPPMLanguageModel::GetProbs(Context context, std::vector<unsigned int> &pro
unsigned int size_of_slice = iToSpend;
pSymbol = pTemp->child;
while(pSymbol) {
- if(!(exclusions[pSymbol->symbol] && doExclusion)) {
- exclusions[pSymbol->symbol] = 1;
+ if(!(exclusions[pSymbol->sym] && doExclusion)) {
+ exclusions[pSymbol->sym] = 1;
unsigned int p = static_cast < myint > (size_of_slice) * (100 * pSymbol->count - beta) / (100 * iTotal + alpha);
- probs[pSymbol->symbol] += p;
+ probs[pSymbol->sym] += p;
iToSpend -= p;
}
// Usprintf(debug,TEXT("sym %u counts %d p %u tospend %u \n"),sym,s->count,p,tospend);
@@ -160,7 +160,7 @@ void CPPMLanguageModel::GetProbs(Context context, std::vector<unsigned int> &pro
DASHER_ASSERT(iToSpend == 0);
}
-void CPPMLanguageModel::AddSymbol(CPPMLanguageModel::CPPMContext &context, int sym)
+void CPPMLanguageModel::AddSymbol(CPPMLanguageModel::CPPMContext &context, symbol sym)
// add symbol to the context
// creates new nodes, updates counts
// and leaves 'context' at the new context
@@ -251,7 +251,7 @@ void CPPMLanguageModel::LearnSymbol(Context c, int Symbol) {
AddSymbol(context, Symbol);
}
-void CPPMLanguageModel::dumpSymbol(int sym) {
+void CPPMLanguageModel::dumpSymbol(symbol sym) {
if((sym <= 32) || (sym >= 127))
printf("<%d>", sym);
else
@@ -286,7 +286,7 @@ void CPPMLanguageModel::dumpTrie(CPPMLanguageModel::CPPMnode *t, int d)
if (t < 0) // pointer to input
printf( " <" );
else {
- Usprintf(debug,TEXT( " %3d %5d %7x %7x %7x <"), t->symbol,t->count, t->vine, t->child, t->next );
+ Usprintf(debug,TEXT( " %3d %5d %7x %7x %7x <"), t->sym,t->count, t->vine, t->child, t->next );
//TODO: Uncomment this when headers sort out
//DebugOutput(debug);
}
@@ -298,7 +298,7 @@ void CPPMLanguageModel::dumpTrie(CPPMLanguageModel::CPPMnode *t, int d)
if (t != 0) {
s = t->child;
while (s != 0) {
- sym =s->symbol;
+ sym =s->sym;
dumpTrieStr [d] = sym;
dumpTrie( s, d+1 );
@@ -337,14 +337,14 @@ void CPPMLanguageModel::dump()
/// PPMnode definitions
////////////////////////////////////////////////////////////////////////
-CPPMLanguageModel::CPPMnode * CPPMLanguageModel::CPPMnode::find_symbol(int sym) const
+CPPMLanguageModel::CPPMnode * CPPMLanguageModel::CPPMnode::find_symbol(symbol sym) const
// see if symbol is a child of node
{
// printf("finding symbol %d at node %d\n",sym,node->id);
CPPMnode *found = child;
while(found) {
- if(found->symbol == sym) {
+ if(found->sym == sym) {
return found;
}
found = found->next;
@@ -352,7 +352,7 @@ CPPMLanguageModel::CPPMnode * CPPMLanguageModel::CPPMnode::find_symbol(int sym)
return 0;
}
-CPPMLanguageModel::CPPMnode * CPPMLanguageModel::AddSymbolToNode(CPPMnode *pNode, int sym, int *update) {
+CPPMLanguageModel::CPPMnode * CPPMLanguageModel::AddSymbolToNode(CPPMnode *pNode, symbol sym, int *update) {
CPPMnode *pReturn = pNode->find_symbol(sym);
// std::cout << sym << ",";
@@ -371,7 +371,7 @@ CPPMLanguageModel::CPPMnode * CPPMLanguageModel::AddSymbolToNode(CPPMnode *pNode
// std::cout << "Creating new node" << std::endl;
pReturn = m_NodeAlloc.Alloc(); // count is initialized to 1
- pReturn->symbol = sym;
+ pReturn->sym = sym;
pReturn->next = pNode->child;
pNode->child = pReturn;
@@ -415,7 +415,7 @@ bool CPPMLanguageModel::RecursiveWrite(CPPMnode *pNode, std::map<CPPMnode *, int
sBR.m_iNext = GetIndex(pNode->next, pmapIdx, pNextIdx);
sBR.m_iVine = GetIndex(pNode->vine, pmapIdx, pNextIdx);
sBR.m_iCount = pNode->count;
- sBR.m_iSymbol = pNode->symbol;
+ sBR.m_iSymbol = pNode->sym;
pOutputFile->write(reinterpret_cast<char*>(&sBR), sizeof(BinaryRecord));
@@ -465,7 +465,7 @@ bool CPPMLanguageModel::ReadFromFile(std::string strFilename) {
pCurrent->next = GetAddress(sBR.m_iNext, &oMap);
pCurrent->vine = GetAddress(sBR.m_iVine, &oMap);
pCurrent->count = sBR.m_iCount;
- pCurrent->symbol = sBR.m_iSymbol;
+ pCurrent->sym = sBR.m_iSymbol;
if(!bStarted) {
m_pRoot = pCurrent;
diff --git a/Src/DasherCore/LanguageModelling/PPMLanguageModel.h b/Src/DasherCore/LanguageModelling/PPMLanguageModel.h
index c149b21..d8a72ba 100644
--- a/Src/DasherCore/LanguageModelling/PPMLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/PPMLanguageModel.h
@@ -29,6 +29,33 @@ namespace Dasher {
///
class CPPMLanguageModel:public CLanguageModel, private NoClones {
+ private:
+ class CPPMnode {
+ public:
+ CPPMnode * find_symbol(symbol sym)const;
+ CPPMnode *child;
+ CPPMnode *next;
+ CPPMnode *vine;
+ unsigned short int count;
+ symbol sym;
+ CPPMnode(symbol sym);
+ CPPMnode();
+ };
+
+ class CPPMContext {
+ public:
+ CPPMContext(CPPMContext const &input) {
+ head = input.head;
+ order = input.order;
+ } CPPMContext(CPPMnode * _head = 0, int _order = 0):head(_head), order(_order) {
+ };
+ ~CPPMContext() {
+ };
+ void dump();
+ CPPMnode *head;
+ int order;
+ };
+
public:
CPPMLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CSymbolAlphabet & alph);
@@ -45,43 +72,16 @@ namespace Dasher {
void dump();
- class CPPMnode {
- public:
- CPPMnode * find_symbol(int sym)const;
- CPPMnode *child;
- CPPMnode *next;
- CPPMnode *vine;
- unsigned short int count;
- short int symbol;
- CPPMnode(int sym);
- CPPMnode();
- };
-
-
virtual bool WriteToFile(std::string strFilename);
virtual bool ReadFromFile(std::string strFilename);
bool RecursiveWrite(CPPMnode *pNode, std::map<CPPMnode *, int> *pmapIdx, int *pNextIdx, std::ofstream *pOutputFile);
int GetIndex(CPPMnode *pAddr, std::map<CPPMnode *, int> *pmapIdx, int *pNextIdx);
CPPMnode *GetAddress(int iIndex, std::map<int, CPPMnode*> *pMap);
- class CPPMContext {
- public:
- CPPMContext(CPPMContext const &input) {
- head = input.head;
- order = input.order;
- } CPPMContext(CPPMnode * _head = 0, int _order = 0):head(_head), order(_order) {
- };
- ~CPPMContext() {
- };
- void dump();
- CPPMnode *head;
- int order;
- };
-
- CPPMnode *AddSymbolToNode(CPPMnode * pNode, int sym, int *update);
+ CPPMnode *AddSymbolToNode(CPPMnode * pNode, symbol sym, int *update);
- virtual void AddSymbol(CPPMContext & context, int sym);
- void dumpSymbol(int sym);
+ virtual void AddSymbol(CPPMContext & context, symbol sym);
+ void dumpSymbol(symbol sym);
void dumpString(char *str, int pos, int len);
void dumpTrie(CPPMnode * t, int d);
@@ -103,7 +103,7 @@ namespace Dasher {
/// @}
- inline Dasher::CPPMLanguageModel::CPPMnode::CPPMnode(int sym):symbol(sym) {
+ inline Dasher::CPPMLanguageModel::CPPMnode::CPPMnode(symbol _sym):sym(_sym) {
child = next = vine = 0;
count = 1;
}
diff --git a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp
index 509275c..64a84c5 100644
--- a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp
+++ b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.cpp
@@ -68,7 +68,7 @@ CPPMPYLanguageModel::~CPPMPYLanguageModel() {
// Get the probability distribution at the context
void CPPMPYLanguageModel::GetProbs(Context context, std::vector<unsigned int> &probs, int norm, int iUniform) const {
- const CPPMContext *ppmcontext = (const CPPMContext *)(context);
+ const CPPMPYContext *ppmcontext = (const CPPMPYContext *)(context);
// DASHER_ASSERT(m_setContexts.count(ppmcontext) > 0);
@@ -103,12 +103,12 @@ void CPPMPYLanguageModel::GetProbs(Context context, std::vector<unsigned int> &p
int alpha = GetLongParameter( LP_LM_ALPHA );
int beta = GetLongParameter( LP_LM_BETA );
- CPPMnode *pTemp = ppmcontext->head;
+ CPPMPYnode *pTemp = ppmcontext->head;
while(pTemp != 0) {
int iTotal = 0;
- CPPMnode *pSymbol;
+ CPPMPYnode *pSymbol;
for(i =0; i<DIVISION; i++){
// std::cout<<"I "<<i<<std::endl;
pSymbol = pTemp->child[i];
@@ -200,7 +200,7 @@ void CPPMPYLanguageModel::GetPartProbs(Context context, SCENode ** pStart, int i
// std::cout<<"Norms is "<<norm<<std::endl;
// std::cout<<"iUniform is "<<iUniform<<std::endl;
- const CPPMContext *ppmcontext = (const CPPMContext *)(context);
+ const CPPMPYContext *ppmcontext = (const CPPMPYContext *)(context);
// DASHER_ASSERT(m_setContexts.count(ppmcontext) > 0);
@@ -243,9 +243,9 @@ void CPPMPYLanguageModel::GetPartProbs(Context context, SCENode ** pStart, int i
int alpha = GetLongParameter( LP_LM_ALPHA );
int beta = GetLongParameter( LP_LM_BETA );
- CPPMnode *pTemp = ppmcontext->head;
- CPPMnode *pFound;
- std::vector<CPPMnode *> vNodeStore;
+ CPPMPYnode *pTemp = ppmcontext->head;
+ CPPMPYnode *pFound;
+ std::vector<CPPMPYnode *> vNodeStore;
//new code
while(pTemp!=0){
@@ -355,12 +355,12 @@ void CPPMPYLanguageModel::GetPartProbs(Context context, SCENode ** pStart, int i
void CPPMPYLanguageModel::GetPYProbs(Context context, std::vector<unsigned int> &probs, int norm, int iUniform) {
- const CPPMContext *ppmcontext = (const CPPMContext *)(context);
+ const CPPMPYContext *ppmcontext = (const CPPMPYContext *)(context);
// std::cout<<"PPMCONTEXT symbol: "<<ppmcontext->head->symbol<<std::endl;
/*
- CPPMnode * pNode = m_pRoot->child;
+ CPPMPYnode * pNode = m_pRoot->child;
while(pNode){
std::cout<<"Next Symbol: "<<pNode->symbol<<" ";
@@ -399,12 +399,12 @@ void CPPMPYLanguageModel::GetPYProbs(Context context, std::vector<unsigned int>
int alpha = GetLongParameter( LP_LM_ALPHA );
int beta = GetLongParameter( LP_LM_BETA );
- CPPMnode *pTemp = ppmcontext->head;
+ CPPMPYnode *pTemp = ppmcontext->head;
while(pTemp != 0) {
int iTotal = 0;
- CPPMnode *pSymbol;
+ CPPMPYnode *pSymbol;
for(i=0; i<DIVISION; i++){
pSymbol = pTemp->pychild[i];
while(pSymbol) {
@@ -479,7 +479,7 @@ void CPPMPYLanguageModel::GetPYProbs(Context context, std::vector<unsigned int>
}
-void CPPMPYLanguageModel::AddSymbol(CPPMPYLanguageModel::CPPMContext &context, int sym)
+void CPPMPYLanguageModel::AddSymbol(CPPMPYLanguageModel::CPPMPYContext &context, int sym)
// add symbol to the context
// creates new nodes, updates counts
// and leaves 'context' at the new context
@@ -491,7 +491,7 @@ void CPPMPYLanguageModel::AddSymbol(CPPMPYLanguageModel::CPPMContext &context, i
DASHER_ASSERT(sym >= 0 && sym < GetSize());
- CPPMnode *vineptr, *temp;
+ CPPMPYnode *vineptr, *temp;
int updatecnt = 1;
temp = context.head->vine;
@@ -515,7 +515,7 @@ void CPPMPYLanguageModel::AddSymbol(CPPMPYLanguageModel::CPPMContext &context, i
}
}
-void CPPMPYLanguageModel::AddPYSymbol(CPPMPYLanguageModel::CPPMContext &context, int pysym)
+void CPPMPYLanguageModel::AddPYSymbol(CPPMPYLanguageModel::CPPMPYContext &context, int pysym)
// add symbol to the context
// creates new nodes, updates counts
// and leaves 'context' at the new context
@@ -527,11 +527,11 @@ void CPPMPYLanguageModel::AddPYSymbol(CPPMPYLanguageModel::CPPMContext &context,
DASHER_ASSERT(pysym >= 0 && pysym < m_pyAlphabet.GetSize());
- CPPMnode *vineptr, *temp, *pytail;
+ CPPMPYnode *vineptr, *temp, *pytail;
int updatecnt = 1;
- //update of vine pointers similar to old PPMnodes
+ //update of vine pointers similar to old PPMPYnodes
temp = context.head->vine;
pytail = AddPYSymbolToNode(context.head, pysym, &updatecnt);
vineptr = pytail;
@@ -566,9 +566,9 @@ void CPPMPYLanguageModel::EnterSymbol(Context c, int Symbol) {
DASHER_ASSERT(Symbol >= 0 && Symbol < GetSize());
- CPPMPYLanguageModel::CPPMContext & context = *(CPPMContext *) (c);
+ CPPMPYLanguageModel::CPPMPYContext & context = *(CPPMPYContext *) (c);
- CPPMnode *find;
+ CPPMPYnode *find;
while(context.head) {
@@ -610,7 +610,7 @@ void CPPMPYLanguageModel::LearnSymbol(Context c, int Symbol) {
DASHER_ASSERT(Symbol >= 0 && Symbol < GetSize());
- CPPMPYLanguageModel::CPPMContext & context = *(CPPMContext *) (c);
+ CPPMPYLanguageModel::CPPMPYContext & context = *(CPPMPYContext *) (c);
AddSymbol(context, Symbol);
}
@@ -620,10 +620,10 @@ void CPPMPYLanguageModel::LearnPYSymbol(Context c, int Symbol) {
DASHER_ASSERT(Symbol >= 0 && Symbol < m_pyAlphabet.GetSize());
- CPPMPYLanguageModel::CPPMContext & context = *(CPPMContext *) (c);
+ CPPMPYLanguageModel::CPPMPYContext & context = *(CPPMPYContext *) (c);
// std::cout<<"py learn context : "<<context.head->symbol<<std::endl;
- /* CPPMnode * pNode = m_pRoot->child;
+ /* CPPMPYnode * pNode = m_pRoot->child;
while(pNode){
std::cout<<"Next Symbol: "<<pNode->symbol<<" ";
@@ -655,14 +655,14 @@ void CPPMPYLanguageModel::dumpString(char *str, int pos, int len)
}
}
-void CPPMPYLanguageModel::dumpTrie(CPPMPYLanguageModel::CPPMnode *t, int d)
+void CPPMPYLanguageModel::dumpTrie(CPPMPYLanguageModel::CPPMPYnode *t, int d)
// diagnostic display of the PPM trie from node t and deeper
{
//TODO
/*
dchar debug[256];
int sym;
- CPPMnode *s;
+ CPPMPYnode *s;
Usprintf( debug,TEXT("%5d %7x "), d, t );
//TODO: Uncomment this when headers sort out
//DebugOutput(debug);
@@ -717,16 +717,16 @@ void CPPMPYLanguageModel::dump()
}
////////////////////////////////////////////////////////////////////////
-/// PPMnode definitions
+/// PPMPYnode definitions
////////////////////////////////////////////////////////////////////////
-CPPMPYLanguageModel::CPPMnode * CPPMPYLanguageModel::CPPMnode::find_symbol(int sym) const
+CPPMPYLanguageModel::CPPMPYnode * CPPMPYLanguageModel::CPPMPYnode::find_symbol(int sym) const
// see if symbol is a child of node
{
// printf("finding symbol %d at node %d\n",sym,node->id);
//Potentially replace with large scale find algorithm, necessary?
- CPPMnode * found = NULL;
+ CPPMPYnode * found = NULL;
bool bFound = 0;
for (int i=0; i<DIVISION-1; i++){
@@ -770,11 +770,11 @@ CPPMPYLanguageModel::CPPMnode * CPPMPYLanguageModel::CPPMnode::find_symbol(int s
}
// New find pysymbol function, to find the py symbol in nodes attached to character node
-CPPMPYLanguageModel::CPPMnode * CPPMPYLanguageModel::CPPMnode::find_pysymbol(int pysym) const
+CPPMPYLanguageModel::CPPMPYnode * CPPMPYLanguageModel::CPPMPYnode::find_pysymbol(int pysym) const
// see if pysymbol is a child of node
{
- CPPMnode * found = NULL;
+ CPPMPYnode * found = NULL;
bool bFound = 0;
for (int i=0; i<DIVISION-1; i++){
@@ -797,9 +797,9 @@ CPPMPYLanguageModel::CPPMnode * CPPMPYLanguageModel::CPPMnode::find_pysymbol(int
return 0;
}
-CPPMPYLanguageModel::CPPMnode * CPPMPYLanguageModel::AddSymbolToNode(CPPMnode *pNode, int sym, int *update) {
+CPPMPYLanguageModel::CPPMPYnode * CPPMPYLanguageModel::AddSymbolToNode(CPPMPYnode *pNode, int sym, int *update) {
// std::cout<<"Addnode sym "<<sym<<std::endl;
- CPPMnode *pReturn = pNode->find_symbol(sym);
+ CPPMPYnode *pReturn = pNode->find_symbol(sym);
// std::cout << sym << ",";
@@ -842,8 +842,8 @@ CPPMPYLanguageModel::CPPMnode * CPPMPYLanguageModel::AddSymbolToNode(CPPMnode *p
}
-CPPMPYLanguageModel::CPPMnode * CPPMPYLanguageModel::AddPYSymbolToNode(CPPMnode *pNode, int pysym, int *update) {
- CPPMnode *pReturn = pNode->find_pysymbol(pysym);
+CPPMPYLanguageModel::CPPMPYnode * CPPMPYLanguageModel::AddPYSymbolToNode(CPPMPYnode *pNode, int pysym, int *update) {
+ CPPMPYnode *pReturn = pNode->find_pysymbol(pysym);
// std::cout << sym << ",";
@@ -900,7 +900,7 @@ bool CPPMPYLanguageModel::WriteToFile(std::string strFilename) {
std::cout<<"WRITE TO FILE USED?"<<std::endl;
- std::map<CPPMnode *, int> mapIdx;
+ std::map<CPPMPYnode *, int> mapIdx;
int iNextIdx(1); // Index of 0 means NULL;
std::ofstream oOutputFile(strFilename.c_str());
@@ -913,7 +913,7 @@ bool CPPMPYLanguageModel::WriteToFile(std::string strFilename) {
};
//Mandarin - PY not enabled for these read-write functions
-bool CPPMPYLanguageModel::RecursiveWrite(CPPMnode *pNode, std::map<CPPMnode *, int> *pmapIdx, int *pNextIdx, std::ofstream *pOutputFile) {
+bool CPPMPYLanguageModel::RecursiveWrite(CPPMPYnode *pNode, std::map<CPPMPYnode *, int> *pmapIdx, int *pNextIdx, std::ofstream *pOutputFile) {
// Dump node here
@@ -929,7 +929,7 @@ bool CPPMPYLanguageModel::RecursiveWrite(CPPMnode *pNode, std::map<CPPMnode *, i
pOutputFile->write(reinterpret_cast<char*>(&sBR), sizeof(BinaryRecord));
- CPPMnode *pCurrentChild(pNode->child[0]);
+ CPPMPYnode *pCurrentChild(pNode->child[0]);
while(pCurrentChild != NULL) {
RecursiveWrite(pCurrentChild, pmapIdx, pNextIdx, pOutputFile);
@@ -939,17 +939,17 @@ bool CPPMPYLanguageModel::RecursiveWrite(CPPMnode *pNode, std::map<CPPMnode *, i
return true;
};
-int CPPMPYLanguageModel::GetIndex(CPPMnode *pAddr, std::map<CPPMnode *, int> *pmapIdx, int *pNextIdx) {
+int CPPMPYLanguageModel::GetIndex(CPPMPYnode *pAddr, std::map<CPPMPYnode *, int> *pmapIdx, int *pNextIdx) {
std::cout<<"GetIndex gets called?"<<std::endl;
int iIndex;
if(pAddr == NULL)
iIndex = 0;
else {
- std::map<CPPMnode *, int>::iterator it(pmapIdx->find(pAddr));
+ std::map<CPPMPYnode *, int>::iterator it(pmapIdx->find(pAddr));
if(it == pmapIdx->end()) {
iIndex = *pNextIdx;
- pmapIdx->insert(std::pair<CPPMnode *, int>(pAddr, iIndex));
+ pmapIdx->insert(std::pair<CPPMPYnode *, int>(pAddr, iIndex));
++(*pNextIdx);
}
else {
@@ -964,14 +964,14 @@ int CPPMPYLanguageModel::GetIndex(CPPMnode *pAddr, std::map<CPPMnode *, int> *pm
bool CPPMPYLanguageModel::ReadFromFile(std::string strFilename) {
std::ifstream oInputFile(strFilename.c_str());
- std::map<int, CPPMnode*> oMap;
+ std::map<int, CPPMPYnode*> oMap;
BinaryRecord sBR;
bool bStarted(false);
while(!oInputFile.eof()) {
oInputFile.read(reinterpret_cast<char *>(&sBR), sizeof(BinaryRecord));
- CPPMnode *pCurrent(GetAddress(sBR.m_iIndex, &oMap));
+ CPPMPYnode *pCurrent(GetAddress(sBR.m_iIndex, &oMap));
//Note future changes here:
pCurrent->child[0] = GetAddress(sBR.m_iChild, &oMap);
pCurrent->next = GetAddress(sBR.m_iNext, &oMap);
@@ -990,15 +990,15 @@ bool CPPMPYLanguageModel::ReadFromFile(std::string strFilename) {
return false;
};
-CPPMPYLanguageModel::CPPMnode *CPPMPYLanguageModel::GetAddress(int iIndex, std::map<int, CPPMnode*> *pMap) {
+CPPMPYLanguageModel::CPPMPYnode *CPPMPYLanguageModel::GetAddress(int iIndex, std::map<int, CPPMPYnode*> *pMap) {
std::cout<<"Get Address gets called?"<<std::endl;
- std::map<int, CPPMnode*>::iterator it(pMap->find(iIndex));
+ std::map<int, CPPMPYnode*>::iterator it(pMap->find(iIndex));
if(it == pMap->end()) {
- CPPMnode *pNewNode;
+ CPPMPYnode *pNewNode;
pNewNode = m_NodeAlloc.Alloc();
- pMap->insert(std::pair<int, CPPMnode*>(iIndex, pNewNode));
+ pMap->insert(std::pair<int, CPPMPYnode*>(iIndex, pNewNode));
return pNewNode;
}
else {
diff --git a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h
index 9e4174a..94f466f 100644
--- a/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h
+++ b/Src/DasherCore/LanguageModelling/PPMPYLanguageModel.h
@@ -43,6 +43,37 @@ namespace Dasher {
///
class CPPMPYLanguageModel:public CLanguageModel, private NoClones {
+ private:
+ class CPPMPYnode {
+ public:
+ CPPMPYnode * find_symbol(int sym)const;
+ CPPMPYnode * find_pysymbol(int pysym)const;
+ //Each PPM node store DIVISION number of addresses for children, so that each node branches out DIVISION times (as compared to binary); this is aimed to give better run-time speed
+ CPPMPYnode * child[DIVISION];
+ CPPMPYnode *next;
+ CPPMPYnode *vine;
+ //Similarly (as last comment) for Pin Yin
+ CPPMPYnode * pychild[DIVISION];
+ unsigned short int count;
+ short int symbol;
+ CPPMPYnode(int sym);
+ CPPMPYnode();
+ };
+
+ class CPPMPYContext {
+ public:
+ CPPMPYContext(CPPMPYContext const &input) {
+ head = input.head;
+ order = input.order;
+ } CPPMPYContext(CPPMPYnode * _head = 0, int _order = 0):head(_head), order(_order) {
+ };
+ ~CPPMPYContext() {
+ };
+ void dump();
+ CPPMPYnode *head;
+ int order;
+ };
+
public:
CPPMPYLanguageModel(Dasher::CEventHandler * pEventHandler, CSettingsStore * pSettingsStore, const CSymbolAlphabet & alph, const CSymbolAlphabet & pyalph);
@@ -63,56 +94,24 @@ namespace Dasher {
void dump();
- class CPPMnode {
- public:
- CPPMnode * find_symbol(int sym)const;
- CPPMnode * find_pysymbol(int pysym)const;
- //Each PPM node store DIVISION number of addresses for children, so that each node branches out DIVISION times (as compared to binary); this is aimed to give better run-time speed
- CPPMnode * child[DIVISION];
- CPPMnode *next;
- CPPMnode *vine;
- //Similarly (as last comment) for Pin Yin
- CPPMnode * pychild[DIVISION];
- unsigned short int count;
- short int symbol;
- CPPMnode(int sym);
- CPPMnode();
- };
-
-
virtual bool WriteToFile(std::string strFilename);
virtual bool ReadFromFile(std::string strFilename);
- bool RecursiveWrite(CPPMnode *pNode, std::map<CPPMnode *, int> *pmapIdx, int *pNextIdx, std::ofstream *pOutputFile);
- int GetIndex(CPPMnode *pAddr, std::map<CPPMnode *, int> *pmapIdx, int *pNextIdx);
- CPPMnode *GetAddress(int iIndex, std::map<int, CPPMnode*> *pMap);
-
- class CPPMContext {
- public:
- CPPMContext(CPPMContext const &input) {
- head = input.head;
- order = input.order;
- } CPPMContext(CPPMnode * _head = 0, int _order = 0):head(_head), order(_order) {
- };
- ~CPPMContext() {
- };
- void dump();
- CPPMnode *head;
- int order;
- };
-
+ bool RecursiveWrite(CPPMPYnode *pNode, std::map<CPPMPYnode *, int> *pmapIdx, int *pNextIdx, std::ofstream *pOutputFile);
+ int GetIndex(CPPMPYnode *pAddr, std::map<CPPMPYnode *, int> *pmapIdx, int *pNextIdx);
+ CPPMPYnode *GetAddress(int iIndex, std::map<int, CPPMPYnode*> *pMap);
- CPPMnode *AddSymbolToNode(CPPMnode * pNode, int sym, int *update);
- CPPMnode *AddPYSymbolToNode(CPPMnode * pNode, int pysym, int *update);
+ CPPMPYnode *AddSymbolToNode(CPPMPYnode * pNode, int sym, int *update);
+ CPPMPYnode *AddPYSymbolToNode(CPPMPYnode * pNode, int pysym, int *update);
- virtual void AddSymbol(CPPMContext & context, int sym);
- void AddPYSymbol(CPPMContext & context, int pysym);
+ virtual void AddSymbol(CPPMPYContext & context, int sym);
+ void AddPYSymbol(CPPMPYContext & context, int pysym);
void dumpSymbol(int sym);
void dumpString(char *str, int pos, int len);
- void dumpTrie(CPPMnode * t, int d);
+ void dumpTrie(CPPMPYnode * t, int d);
- CPPMContext *m_pRootContext;
- CPPMnode *m_pRoot;
+ CPPMPYContext *m_pRootContext;
+ CPPMPYnode *m_pRoot;
int m_iMaxOrder;
double m_dBackOffConstat;
@@ -123,10 +122,10 @@ namespace Dasher {
- mutable CSimplePooledAlloc < CPPMnode > m_NodeAlloc;
- CPooledAlloc < CPPMContext > m_ContextAlloc;
+ mutable CSimplePooledAlloc < CPPMPYnode > m_NodeAlloc;
+ CPooledAlloc < CPPMPYContext > m_ContextAlloc;
- std::set<const CPPMContext *> m_setContexts;
+ std::set<const CPPMPYContext *> m_setContexts;
private:
@@ -138,7 +137,7 @@ namespace Dasher {
/// @}
- inline Dasher::CPPMPYLanguageModel::CPPMnode::CPPMnode(int sym):symbol(sym) {
+ inline Dasher::CPPMPYLanguageModel::CPPMPYnode::CPPMPYnode(int sym):symbol(sym) {
// child.clear();
// pychild.clear();
@@ -152,7 +151,7 @@ namespace Dasher {
}
}
- inline CPPMPYLanguageModel::CPPMnode::CPPMnode() {
+ inline CPPMPYLanguageModel::CPPMPYnode::CPPMPYnode() {
// child.clear();
// pychild.clear();
@@ -169,7 +168,7 @@ namespace Dasher {
}
inline CLanguageModel::Context CPPMPYLanguageModel::CreateEmptyContext() {
- CPPMContext *pCont = m_ContextAlloc.Alloc();
+ CPPMPYContext *pCont = m_ContextAlloc.Alloc();
*pCont = *m_pRootContext;
// m_setContexts.insert(pCont);
@@ -178,8 +177,8 @@ namespace Dasher {
}
inline CLanguageModel::Context CPPMPYLanguageModel::CloneContext(Context Copy) {
- CPPMContext *pCont = m_ContextAlloc.Alloc();
- CPPMContext *pCopy = (CPPMContext *) Copy;
+ CPPMPYContext *pCont = m_ContextAlloc.Alloc();
+ CPPMPYContext *pCopy = (CPPMPYContext *) Copy;
*pCont = *pCopy;
// m_setContexts.insert(pCont);
@@ -189,9 +188,9 @@ namespace Dasher {
inline void CPPMPYLanguageModel::ReleaseContext(Context release) {
- // m_setContexts.erase(m_setContexts.find((CPPMContext *) release));
+ // m_setContexts.erase(m_setContexts.find((CPPMPYContext *) release));
- m_ContextAlloc.Free((CPPMContext *) release);
+ m_ContextAlloc.Free((CPPMPYContext *) release);
}
} // end namespace Dasher
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]