[dasher] These files came with "Refactor Mandarin changes to CAlphabetManager

From: Patrick Welche <pwelche src gnome org>
To: svn-commits-list gnome org
Cc:
Subject: [dasher] These files came with "Refactor Mandarin changes to CAlphabetManager
Date: Sat, 15 Aug 2009 14:23:23 +0000 (UTC)
commit 7adcda81a10665dc310d02933218b01ed1567cf9
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date:   Mon Aug 10 10:19:35 2009 +0200

    These files came with "Refactor Mandarin changes to CAlphabetManager
    into CMandarinAlphMgr subclass".

 Src/DasherCore/ConversionHelper.cpp |  398 +++++++++++++++++++++++++++++++++++
 Src/DasherCore/MandarinAlphMgr.cpp  |   91 ++++++++
 Src/DasherCore/MandarinAlphMgr.h    |   57 +++++
 3 files changed, 546 insertions(+), 0 deletions(-)
---
diff --git a/Src/DasherCore/ConversionHelper.cpp b/Src/DasherCore/ConversionHelper.cpp
new file mode 100644
index 0000000..03bc161
--- /dev/null
+++ b/Src/DasherCore/ConversionHelper.cpp
@@ -0,0 +1,398 @@
+// ConversionHelper.cpp
+//
+// Copyright (c) 2007 The Dasher Team
+//
+// This file is part of Dasher.
+//
+// Dasher is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// Dasher is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Dasher; if not, write to the Free Software 
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif 
+
+#include "ConversionHelper.h"
+#include "Event.h"
+#include "EventHandler.h"
+#include "NodeCreationManager.h"
+#include "DasherNode.h"
+
+#include <iostream>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdlib.h>
+
+//Note the new implementation in Mandarin Dasher may not be compatible with the previous implementation of Japanese Dasher
+//Need to reconcile (a small project)
+
+using namespace Dasher;
+
+CConversionHelper::CConversionHelper(CNodeCreationManager *pNCManager, CAlphabet *pAlphabet) :
+  CConversionManager(pNCManager, pAlphabet), m_pLanguageModel(pNCManager->GetLanguageModel()) {
+	  colourStore[0][0]=66;//light blue
+	  colourStore[0][1]=64;//very light green
+	  colourStore[0][2]=62;//light yellow
+	  colourStore[1][0]=78;//light purple
+	  colourStore[1][1]=81;//brownish
+	  colourStore[1][2]=60;//red
+
+  m_iLearnContext = m_pLanguageModel->CreateEmptyContext();
+
+}
+
+CDasherNode *CConversionHelper::GetRoot(CDasherNode *pParent, int iLower, int iUpper, void *pUserData) {
+  CDasherNode *pNewNode = CConversionManager::GetRoot(pParent, iLower, iUpper, pUserData);
+
+  SConversionData *pNodeUserData = static_cast<SConversionData *>(pNewNode->m_pUserData);
+	
+  pNodeUserData->pLanguageModel = m_pLanguageModel;
+  
+  CAlphabetManager::SAlphabetData *pParentAlphabetData = static_cast<CAlphabetManager::SAlphabetData *>(pParent->m_pUserData);      
+  if((pParent->m_pNodeManager->GetID()==0)&&(pParentAlphabetData->iContext)){
+	pNodeUserData->iContext=m_pLanguageModel->CloneContext(pParentAlphabetData->iContext); 
+  }
+  else{
+    CLanguageModel::Context iContext;
+    iContext = m_pLanguageModel->CreateEmptyContext();
+    pNodeUserData->iContext = iContext;
+  }
+  return pNewNode;
+}
+
+// TODO: This function needs to be significantly tidied up
+// TODO: get rid of pSizes
+
+void CConversionHelper::AssignChildSizes(SCENode **pNode, CLanguageModel::Context context, int iNChildren) {
+
+    // Calculate sizes for the children. Note that normalisation is
+    // done additiviely rather than multiplicatively, so it's not
+    // quite what was originally planned (but I don't think this is
+    // much of a problem). More serious is the fact that the ordering
+    // is being lost when the tree is created, as nodes begininning
+    // with the same character are merged. This needs to be though
+    // out, but the probabilities should probably be done at the time
+    // of construction of the candidate tree rather than the Dasher
+    // tree (aside - is there any real point having two separate trees
+    // - surely we should just create Dasher nodes right away?).
+    //
+    // The algorithm should also allow for the possibility of the
+    // conversion engine returning probabilities itself, which should
+    // be used in preference to the values infered from the ordering
+    //
+    // Finally, maybe the choices should be presented in lexographic
+    // order, rather than in order returned (really not sure about
+    // this - it needs to be thought through).
+
+
+    //    std::cout << "b" << std::endl;
+
+    //TESTING FOR CALCULATESCORE STAGE 1
+    //int test;
+    //test = CalculateScore(pNode, 1);
+    //std::cout<<"current character"<<pCurrentSCENode->pszConversion<<std::endl;
+    //std::cout<<"the score for the second candidate is"<<test<<std::endl;
+
+
+
+    //ASSIGNING SCORES AND CALCULATING NODE SIZE
+    //Ph: feel free to edit this part to make it more structured
+//     int iSize[pCurrentSCEChild->IsHeadAndCandNum];
+//     int score[pCurrentSCEChild->IsHeadAndCandNum];
+//     int total =0;
+//     int max = 0;
+//     int CandNum = pCurrentSCEChild -> IsHeadAndCandNum;
+
+// CHANGE    int iRemaining(m_pNCManager->GetLongParameter(LP_NORMALIZATION));
+
+    // Thoughts on the general idea here - this is very close to being
+    // a fully fledged language model, so I think we should go with
+    // that idea, but maybe we need something mode flexible. I'd
+    // imagine:
+    //
+    // 1. Probabilities provided directly with translation? Maybe hard
+    // to represent in the lattice itself. 
+    //
+    // 2. Full n-gram language model provided - in general assign
+    // probabilities to paths through the lattice
+    // 
+    // 3. Ordered results, but no probabilities - using a power law
+    // rule or the like.
+    //
+    // Tempted to assume (1) and (2) can be implemented together, with
+    // a second call to the library at node creation time, and (3) can
+    // be implemented as a fallback if that doesn't work.
+    //
+    // Things to be thought out:
+    // - How to deal with contexts - backtrace at time of call or stored in node?
+    // - Sharing of language model infrastructure?
+    
+
+  
+    // Lookup scores for each of the children
+  
+    // TODO: Reimplement -----
+  
+  //     for(int i(0); i < pCurrentSCEChild->IsHeadAndCandNum; ++i){
+  //       score[i] = CalculateScore(pNode, i);
+  //       total += score[i];
+  //       if(i!=0)
+  // 	if (score[i]>score[i-1])
+  // 	  max = score[i];
+  //     }
+  
+  // -----
+  
+  // Use the scores to calculate the size of the nodes
+  
+  
+  iNChildren = 0;
+  SCENode *pChild(*pNode);
+  
+  while(pChild) {
+    pChild = pChild->GetNext();
+    ++iNChildren;
+  }
+
+  //  std::cout<<"iNChildren: "<<iNChildren<<std::endl;
+  AssignSizes(pNode, context, m_pNCManager->GetLongParameter(LP_NORMALIZATION), m_pNCManager->GetLongParameter(LP_UNIFORM), iNChildren);
+
+  
+}
+
+void CConversionHelper::PopulateChildren( CDasherNode *pNode ) {
+  DASHER_ASSERT(m_pNCManager);
+
+  SConversionData * pCurrentDataNode (static_cast<SConversionData *>(pNode->m_pUserData));
+  CDasherNode *pNewNode;
+
+  // Do the conversion and build the tree (lattice) if it hasn't been
+  // done already.
+  //
+  
+
+  if(pCurrentDataNode->bisRoot) {
+    BuildTree(pNode);
+  }
+
+  SCENode *pCurrentSCEChild;
+
+  if(pCurrentDataNode->pSCENode){
+    
+    //    RecursiveDumpTree(pCurrentDataNode->pSCENode, 1);
+    pCurrentSCEChild = pCurrentDataNode->pSCENode->GetChild();
+
+  }
+  else {
+    //    if(m_pRoot && !pCurrentDataNode->bType)
+    //  pCurrentSCEChild = m_pRoot[0];
+    //else
+      pCurrentSCEChild = 0;
+  }
+  
+  if(pCurrentSCEChild) {
+    //    std::cout<<"Populating character nodes!"<<std::endl;
+    //    std::cout << "Current SCE Child: " << pCurrentSCEChild << std::endl;
+
+    // TODO: Reimplement (in subclass) -----
+    
+//     if(m_iHZCount>1)
+//       if(!m_bPhrasesProcessed[pCurrentSCEChild->AcCharCount-1])
+//    	if(pCurrentSCEChild->AcCharCount<m_iHZCount)
+// 	  ProcessPhrase(pCurrentSCEChild->AcCharCount-1);
+
+    // -----
+
+    //int *iSize;
+    
+    //    iSize = new int[pCurrentSCEChild->IsHeadAndCandNum];
+
+    
+
+    
+    AssignChildSizes(&pCurrentSCEChild, pCurrentDataNode->iContext, pCurrentSCEChild->IsHeadAndCandNum);
+
+    int iIdx(0);
+    int iCum(0);
+  
+    //    int parentClr = pNode->Colour();
+    // TODO: Fixme
+    int parentClr = 0;
+
+    // Finally loop through and create the children
+
+    do {
+      //      std::cout << "Current scec: " << pCurrentSCEChild << std::endl;
+
+      int iLbnd(iCum);
+      int iHbnd(iCum + pCurrentSCEChild->NodeSize);
+		//m_pNCManager->GetLongParameter(LP_NORMALIZATION));//
+
+      iCum = iHbnd;
+      
+      // TODO: Parameters here are placeholders - need to figure out
+      // what's right
+      
+
+      CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
+      pDisplayInfo->iColour = AssignColour(parentClr, pCurrentSCEChild, iIdx);
+      pDisplayInfo->bShove = true;
+      pDisplayInfo->bVisible = true;
+      
+      //  std::cout << "#" << pCurrentSCEChild->pszConversion << "#" << std::endl;
+
+      pDisplayInfo->strDisplayText = pCurrentSCEChild->pszConversion;
+       
+      pNewNode = new CDasherNode(pNode, iLbnd, iHbnd, pDisplayInfo);
+      
+      // TODO: Reimplement ----
+
+      // FIXME - handle context properly
+      //      pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
+      // -----
+      
+      pNewNode->m_pNodeManager = this;
+      pNewNode->m_pNodeManager->Ref();
+
+      SConversionData *pNodeUserData = new SConversionData;
+      pNodeUserData->bisRoot = false;
+      pNodeUserData->pSCENode = pCurrentSCEChild;
+      pNodeUserData->pLanguageModel = pCurrentDataNode->pLanguageModel;
+      pNodeUserData->iOffset = pCurrentDataNode->iOffset + 1;
+
+      if(pCurrentDataNode->pLanguageModel) {
+	CLanguageModel::Context iContext;
+	iContext = pCurrentDataNode->pLanguageModel->CloneContext(pCurrentDataNode->iContext);
+	
+	if(pCurrentSCEChild ->Symbol !=-1)
+	  pNodeUserData->pLanguageModel->EnterSymbol(iContext, pCurrentSCEChild->Symbol); // TODO: Don't use symbols?
+      
+      
+	pNodeUserData->iContext = iContext;
+      }
+      
+      pNewNode->m_pUserData = pNodeUserData;
+      
+      pNode->Children().push_back(pNewNode);
+
+      pCurrentSCEChild = pCurrentSCEChild->GetNext();
+      ++iIdx;
+    }while(pCurrentSCEChild);
+
+  }
+
+  else {//End of conversion -> default to alphabet
+   
+      //Phil//
+      // TODO: Placeholder algorithm here
+      // TODO: Add an 'end of conversion' node?
+      int iLbnd(0);
+      int iHbnd(m_pNCManager->GetLongParameter(LP_NORMALIZATION)); 
+      
+      CAlphabetManager::SRootData oRootData;
+      oRootData.szContext = NULL;
+      oRootData.iOffset = pCurrentDataNode->iOffset;
+   
+      pNewNode = m_pNCManager->GetRoot(0, pNode, iLbnd, iHbnd, &oRootData);
+      pNewNode->SetFlag(NF_SEEN, false);
+      
+      pNode->Children().push_back(pNewNode);
+      //    pNode->SetHasAllChildren(false);
+      //}
+    /* What do the following code do?
+    else {
+
+      std::cout<<"DOES IT EVER COME TO HERE?"<<std::endl;
+      int iLbnd(0);
+      int iHbnd(m_pNCManager->GetLongParameter(LP_NORMALIZATION)); 
+
+      CDasherNode::SDisplayInfo *pDisplayInfo = new CDasherNode::SDisplayInfo;
+      pDisplayInfo->iColour = AssignColour(0, pCurrentSCEChild, 0);
+      pDisplayInfo->bShove = true;
+      pDisplayInfo->bVisible = true;
+      pDisplayInfo->strDisplayText = "";
+       
+      pNewNode = new CDasherNode(pNode, iLbnd, iHbnd, pDisplayInfo);
+      
+      // TODO: Reimplement ----
+
+      // FIXME - handle context properly
+      //      pNewNode->SetContext(m_pLanguageModel->CreateEmptyContext());
+      // -----
+      
+      pNewNode->m_pNodeManager = this;
+      pNewNode->m_pNodeManager->Ref();
+
+      SConversionData *pNodeUserData = new SConversionData;
+      pNodeUserData->bType = true;
+      pNodeUserData->pSCENode = NULL;
+      pNodeUserData->pLanguageModel = pCurrentDataNode->pLanguageModel;
+      pNodeUserData->iOffset = pCurrentDataNode->iOffset + 1;
+     
+      pNewNode->m_pUserData = pNodeUserData;
+
+      pNewNode->SetFlag(NF_SEEN, false);
+      
+      pNode->Children().push_back(pNewNode);
+    }
+    */
+  }
+}
+
+void CConversionHelper::BuildTree(CDasherNode *pRoot) {
+
+  std::string strCurrentString; 
+
+  //Find the pinyin (roman) text (stored in Display text) of the previous alphabet node
+
+    CAlphabetManager::SAlphabetData *pRootAlphabetData = static_cast<CAlphabetManager::SAlphabetData *>(pRoot->m_pUserData);
+
+    //Get pinyin string (to translate) from 'Display Text' in the alphabet file (refer to alphabet.spyDict.xml)
+    strCurrentString = m_pAlphabet->GetDisplayText(pRootAlphabetData->iSymbol);
+    
+    SCENode *pStartTemp;
+    bool ConversionSuccess = Convert(strCurrentString, &pStartTemp); 
+    
+    SConversionData *pRootConversionData = static_cast<CConversionHelper::SConversionData *>(pRoot->m_pUserData);
+    
+    if(!(pRootConversionData->bisRoot))
+      std::cout<<"ERROR IN BUILD TREE"<<std::endl;
+
+    //Store all conversion trees(SCENode trees) in the pUserData->pSCENode of each Conversion Root
+    
+    else{
+      pRootConversionData->pSCENode = pStartTemp;
+    } 
+}
+
+void CConversionHelper::SetFlag(CDasherNode *pNode, int iFlag, bool bValue) {
+  switch(iFlag) {
+  case NF_COMMITTED:
+    if(bValue){
+      CLanguageModel * pLan =  static_cast<SConversionData *>(pNode->m_pUserData)->pLanguageModel;
+
+      SCENode * pSCENode = static_cast<SConversionData *>(pNode->m_pUserData)->pSCENode; 
+
+      if(!pSCENode)
+	return;
+
+      symbol s =pSCENode ->Symbol;
+    
+     
+      if(s!=-1)
+	pLan->LearnSymbol(m_iLearnContext, s);
+    }
+    break;
+  }
+}
diff --git a/Src/DasherCore/MandarinAlphMgr.cpp b/Src/DasherCore/MandarinAlphMgr.cpp
new file mode 100644
index 0000000..4f5af1d
--- /dev/null
+++ b/Src/DasherCore/MandarinAlphMgr.cpp
@@ -0,0 +1,91 @@
+// MandarinAlphMgr.cpp
+//
+// Copyright (c) 2009 The Dasher Team
+//
+// This file is part of Dasher.
+//
+// Dasher is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// Dasher is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Dasher; if not, write to the Free Software 
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+#include "../Common/Common.h"
+
+#include "MandarinAlphMgr.h"
+#include "ConversionManager.h"
+#include "DasherInterfaceBase.h"
+#include "DasherNode.h"
+#include "Event.h"
+#include "EventHandler.h"
+#include "NodeCreationManager.h"
+
+
+#include <vector>
+#include <sstream>
+#include <iostream>
+
+using namespace Dasher;
+
+// Track memory leaks on Windows to the line that new'd the memory
+#ifdef _WIN32
+#ifdef _DEBUG_MEMLEAKS
+#define DEBUG_NEW new( _NORMAL_BLOCK, THIS_FILE, __LINE__ )
+#define new DEBUG_NEW
+#undef THIS_FILE
+static char THIS_FILE[] = __FILE__;
+#endif
+#endif
+
+CMandarinAlphMgr::CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CLanguageModel *pLanguageModel, CLanguageModel::Context iLearnContext) 
+  : CAlphabetManager(pInterface, pNCManager, pLanguageModel, iLearnContext) {
+}
+
+CDasherNode *CMandarinAlphMgr::GetRoot(CDasherNode *pParent, int iLower, int iUpper, void *pUserData) {
+
+  CDasherNode *pNewNode = CAlphabetManager::GetRoot(pParent, iLower, iUpper, pUserData);
+  SAlphabetData *pNodeUserData = static_cast<SAlphabetData *>(pNewNode->m_pUserData);
+
+  //Override context for Mandarin Dasher
+  if (pParent){
+    CConversionManager::SConversionData *pParentConversionData = static_cast<CConversionManager::SConversionData *>(pParent->m_pUserData);
+    pNodeUserData->iContext = m_pLanguageModel->CloneContext(pParentConversionData->iContext);
+  }
+  else 
+	pNodeUserData->iContext = m_pLanguageModel->CreateEmptyContext();
+
+  return pNewNode;
+}
+
+CDasherNode *CMandarinAlphMgr::CreateSymbolNode(CDasherNode *pParent, symbol iSymbol, unsigned int iLbnd, unsigned int iHbnd, symbol iExistingSymbol, CDasherNode *pExistingChild) {
+
+  if (iSymbol <= 1288) {
+	SAlphabetData *pParentData = static_cast<SAlphabetData *>(pParent->m_pUserData);	
+	  
+    //Modified for Mandarin Dasher
+    //The following logic switch allows punctuation nodes in Mandarin to be treated in the same way as English (i.e. display and populate next round) instead of invoking a conversion node
+	  CDasherNode *pNewNode = m_pNCManager->GetRoot(2, pParent, iLbnd, iHbnd, &(pParentData->iOffset));
+	  static_cast<SAlphabetData *>(pNewNode->m_pUserData)->iSymbol = iSymbol;
+	  return pNewNode;
+  }
+  return CAlphabetManager::CreateSymbolNode(pParent, iSymbol, iLbnd, iHbnd, iExistingSymbol, pExistingChild);
+}
+
+CLanguageModel::Context CMandarinAlphMgr::CreateSymbolContext(SAlphabetData *pParentData, symbol iSymbol)
+{
+	//Context carry-over. This code may worth looking at debug      
+	return m_pLanguageModel->CloneContext(pParentData->iContext);
+}
+
+void CMandarinAlphMgr::SetFlag(CDasherNode *pNode, int iFlag, bool bValue) {
+  //disable learn-as-you-write for Mandarin Dasher
+  if (iFlag!=NF_COMMITTED) CAlphabetManager::SetFlag(pNode, iFlag, bValue);
+}
\ No newline at end of file
diff --git a/Src/DasherCore/MandarinAlphMgr.h b/Src/DasherCore/MandarinAlphMgr.h
new file mode 100644
index 0000000..f247c00
--- /dev/null
+++ b/Src/DasherCore/MandarinAlphMgr.h
@@ -0,0 +1,57 @@
+// MandarinAlphMgr.h
+//
+// Copyright (c) 2009 The Dasher Team
+//
+// This file is part of Dasher.
+//
+// Dasher is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// Dasher is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Dasher; if not, write to the Free Software 
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+#ifndef __mandarinalphmgr_h__
+#define __mandarinalphmgr_h__
+
+#include "AlphabetManager.h"
+
+namespace Dasher {
+
+  class CDasherInterfaceBase;
+
+  /// \ingroup Model
+  /// @{
+
+  /// Overides methods of AlphabetManager for changes needed for Mandarin Dasher
+  ///
+  class CMandarinAlphMgr : public CAlphabetManager {
+  public:
+
+    CMandarinAlphMgr(CDasherInterfaceBase *pInterface, CNodeCreationManager *pNCManager, CLanguageModel *pLanguageModel, CLanguageModel::Context iLearnContext);
+
+    ///
+    /// Get a new root node owned by this manager
+    ///
+
+    virtual CDasherNode *GetRoot(CDasherNode *pParent, int iLower, int iUpper, void *pUserData);
+
+    virtual void SetFlag(CDasherNode *pNode, int iFlag, bool bValue);
+
+  protected:
+	virtual CDasherNode *CreateSymbolNode(CDasherNode *pParent, symbol iSymbol, unsigned int iLbnd, unsigned int iHbnd, symbol iExistingSymbol, CDasherNode *pExistingChild);
+	virtual CLanguageModel::Context CreateSymbolContext(SAlphabetData *pParentData, symbol iSymbol);
+  };
+  /// @}
+
+}
+
+
+#endif
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]