[dasher: 14/43] Progress indication when training



commit b5fa4f0071a365821cb0ba5e482e05434ee291a1
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date:   Mon Dec 21 19:53:46 2009 +0000

    Progress indication when training
    
    SymbolStream calls virtual bytesRead() at start of each block;
     CTrainer takes ProgressNotifier, intercepts calls from SymStream and passes on
     cumulative bytes; NCManager implements to convert to %age & call SetLockStatus.
    
    Inc making CDasherInterfaceBase::GetFileSize public and fixing iPhone impl
      (appears [struct/] stat is dropped in iOS4, so use fopen+fseek instead.
         Does this give us a portable way to get file size???)

 Src/DasherCore/Alphabet/AlphabetMap.cpp      |   12 ++++----
 Src/DasherCore/Alphabet/AlphabetMap.h        |    7 ++++-
 Src/DasherCore/DasherInterfaceBase.h         |   13 ++++----
 Src/DasherCore/NodeCreationManager.cpp       |   40 +++++++++++++++++++++----
 Src/DasherCore/Trainer.cpp                   |   21 +++++++++++--
 Src/DasherCore/Trainer.h                     |   13 +++++++-
 Src/Gtk2/DasherControl.h                     |    2 +-
 Src/MacOSX/COSXDasherControl.h               |    2 +-
 Src/Win32/Dasher.h                           |    2 +-
 Src/iPhone/Classes/CDasherInterfaceBridge.h  |    2 +-
 Src/iPhone/Classes/CDasherInterfaceBridge.mm |   13 ++++----
 11 files changed, 90 insertions(+), 37 deletions(-)
---
diff --git a/Src/DasherCore/Alphabet/AlphabetMap.cpp b/Src/DasherCore/Alphabet/AlphabetMap.cpp
index c30f0f5..8adf97b 100644
--- a/Src/DasherCore/Alphabet/AlphabetMap.cpp
+++ b/Src/DasherCore/Alphabet/AlphabetMap.cpp
@@ -76,7 +76,7 @@ int utf8_length::operator[](const unsigned char i) const
 ////////////////////////////////////////////////////////////////////////////
 
 CAlphabetMap::SymbolStream::SymbolStream(std::istream &_in)
-: in(_in), pos(0), len(0) {
+: pos(0), len(0), in(_in) {
   readMore();
 }
 
@@ -95,16 +95,16 @@ void CAlphabetMap::SymbolStream::readMore() {
 
 inline int CAlphabetMap::SymbolStream::findNext() {
   for (;;) {
-    if (pos + m_utf8_count_array.max_length > len && len==1024) {
-      //may need more bytes for next char; and input not yet exhausted.
+    if (pos + m_utf8_count_array.max_length > len) {
+      //may need more bytes for next char
       if (pos) {
         //shift remaining bytes to beginning
         len-=pos; //len of them
-        //memcpy isn't safe for overlapping regions of memory...
-        DASHER_ASSERT(len<pos); //...but they really shouldn't overlap!
-        memcpy(buf, &buf[pos], len);
+        memmove(buf, &buf[pos], len);
+        bytesRead(pos);
         pos=0;
       }
+      //and look for more
       readMore();
     }
     //if still don't have any chars after attempting to read more...EOF!
diff --git a/Src/DasherCore/Alphabet/AlphabetMap.h b/Src/DasherCore/Alphabet/AlphabetMap.h
index 9939673..4ba04bc 100644
--- a/Src/DasherCore/Alphabet/AlphabetMap.h
+++ b/Src/DasherCore/Alphabet/AlphabetMap.h
@@ -100,6 +100,11 @@ public:
     /// the stream position. (Always constructs a string, which next() avoids for 
     /// single-octet chars, so may be slower.)
     std::string peekBack();
+  protected:
+    ///Called periodically to indicate some number of bytes have been read.
+    /// Default implementation does nothing; subclasses may override for e.g. logging.
+    /// \param num number of octets read _since_ the previous call.
+    virtual void bytesRead(off_t num) {};
   private:
     ///Finds beginning of next unicode character, at position 'pos' or later,
     /// filling buffer and skipping invalid characters as necessary.
@@ -109,7 +114,7 @@ public:
     inline int findNext();
     void readMore();
     char buf[1024];
-    int pos, len;
+    off_t pos, len;
     std::istream &in;
   };
   
diff --git a/Src/DasherCore/DasherInterfaceBase.h b/Src/DasherCore/DasherInterfaceBase.h
index 5e7df51..0d4de6b 100644
--- a/Src/DasherCore/DasherInterfaceBase.h
+++ b/Src/DasherCore/DasherInterfaceBase.h
@@ -461,6 +461,12 @@ public:
   /// public so e.g. iPhone can flush the buffer when app is backgrounded.
   void WriteTrainFileFull();
 
+  ///
+  /// Obtain the size in bytes of a file - the way to do this is
+  /// dependent on the OS (TODO: Check this - any posix on Windows?)
+  ///
+  virtual int GetFileSize(const std::string &strFileName) = 0;
+
 protected:
 
   /// @name Startup
@@ -579,13 +585,6 @@ protected:
   virtual void CreateSettingsStore() = 0;
 
   ///
-  /// Obtain the size in bytes of a file - the way to do this is
-  /// dependent on the OS (TODO: Check this - any posix on Windows?)
-  ///
-
-  virtual int GetFileSize(const std::string &strFileName) = 0;
-
-  ///
   /// Start the callback timer
   ///
 
diff --git a/Src/DasherCore/NodeCreationManager.cpp b/Src/DasherCore/NodeCreationManager.cpp
index 47e20bd..838ec4d 100644
--- a/Src/DasherCore/NodeCreationManager.cpp
+++ b/Src/DasherCore/NodeCreationManager.cpp
@@ -8,6 +8,30 @@
 
 using namespace Dasher;
 
+class ProgressNotifier : public CTrainer::ProgressIndicator {
+public:
+  ProgressNotifier(CDasherInterfaceBase *pInterface, CTrainer *pTrainer)
+  : m_pInterface(pInterface), m_pTrainer(pTrainer) { }
+  void bytesRead(off_t n) {
+    int iNewPercent = ((m_iStart + n)*100)/m_iStop;
+    if (iNewPercent != m_iPercent) {
+      m_pInterface->SetLockStatus(m_strDisplay, m_iPercent = iNewPercent);
+    }
+  }
+  void run(const string &strDisplay, string strFile) {
+    m_pInterface->SetLockStatus(m_strDisplay=strDisplay, m_iPercent=0);
+    m_iStart = 0;
+    m_iStop = m_pInterface->GetFileSize(strFile);
+    m_pTrainer->LoadFile(strFile,this);
+  }
+private:
+  CDasherInterfaceBase *m_pInterface;
+  CTrainer *m_pTrainer;
+  off_t m_iStart, m_iStop;
+  int m_iPercent;
+  string m_strDisplay;
+};
+
 CNodeCreationManager::CNodeCreationManager(Dasher::CDasherInterfaceBase *pInterface,
                                            Dasher::CEventHandler *pEventHandler, 
                                            CSettingsStore *pSettingsStore,
@@ -56,13 +80,14 @@ CNodeCreationManager::CNodeCreationManager(Dasher::CDasherInterfaceBase *pInterf
   m_pTrainer = m_pAlphabetManager->GetTrainer();
     
   if (!pAlphInfo->GetTrainingFile().empty()) {
+    ProgressNotifier pn(pInterface, m_pTrainer);
+
     //1. Look for system training text...
-    pInterface->SetLockStatus("Training on System Text", 0);
-    m_pTrainer->LoadFile(GetStringParameter(SP_SYSTEM_LOC) + pAlphInfo->GetTrainingFile());
-    //Now add in any user-provided individual training text...
-    pInterface->SetLockStatus("Training on User Text", 0);
-    m_pTrainer->LoadFile(GetStringParameter(SP_USER_LOC) + pAlphInfo->GetTrainingFile());
-    pInterface->SetLockStatus("",-1);
+    pn.run("Training on System Text", GetStringParameter(SP_SYSTEM_LOC) + pAlphInfo->GetTrainingFile());
+    //2. Now add in any user-provided individual training text...
+    pn.run("Training on User Text", GetStringParameter(SP_USER_LOC) + pAlphInfo->GetTrainingFile());
+    //3. Finished, so unlock.
+    m_pInterface->SetLockStatus("", -1);
   }
 #ifdef DEBUG
   else {
@@ -145,5 +170,6 @@ void CNodeCreationManager::AddExtras(CDasherNode *pParent) {
 
 void 
 CNodeCreationManager::ImportTrainingText(const std::string &strPath) {
-	m_pTrainer->LoadFile(strPath);
+  ProgressNotifier pn(m_pInterface, m_pTrainer);
+	pn.run("Training on New Text", strPath);
 }
diff --git a/Src/DasherCore/Trainer.cpp b/Src/DasherCore/Trainer.cpp
index 0e65390..c8c267d 100644
--- a/Src/DasherCore/Trainer.cpp
+++ b/Src/DasherCore/Trainer.cpp
@@ -78,8 +78,20 @@ bool CTrainer::readEscape(CLanguageModel::Context &sContext, CAlphabetMap::Symbo
   return true;  
 }
 
+class ProgressStream : public CAlphabetMap::SymbolStream {
+public:
+  ProgressStream(std::istream &_in, CTrainer::ProgressIndicator *pProg, off_t iStart=0) : SymbolStream(_in), m_iLastPos(iStart), m_pProg(pProg) {
+  }
+  void bytesRead(off_t num) {
+    if (m_pProg) m_pProg->bytesRead(m_iLastPos += num);
+  }
+  off_t m_iLastPos;
+private:
+  CTrainer::ProgressIndicator *m_pProg;
+};
+
 void 
-Dasher::CTrainer::LoadFile(const std::string &strFileName) {
+Dasher::CTrainer::LoadFile(const std::string &strFileName, ProgressIndicator *pProg) {
   if(strFileName == "")
     return;
   
@@ -97,6 +109,7 @@ Dasher::CTrainer::LoadFile(const std::string &strFileName) {
   if(!strcmp(szTestBuffer, "<?xml")) {
     //Invoke AbstractXMLParser method
     m_bInSegment = false;
+    m_iLastBytes=0;
     ParseFile(strFileName);
   } else {
     std::ifstream in(strFileName.c_str(), std::ios::binary);
@@ -104,7 +117,7 @@ Dasher::CTrainer::LoadFile(const std::string &strFileName) {
       std::cerr << "Unable to open file \"" << strFileName << "\" for reading" << std::endl;
       return;
     }
-    CAlphabetMap::SymbolStream syms(in);
+    ProgressStream syms(in,pProg);
     Train(syms);
   
     in.close();
@@ -121,9 +134,9 @@ void CTrainer::XmlStartHandler(const XML_Char *szName, const XML_Char **pAtts) {
 void CTrainer::XmlEndHandler(const XML_Char *szName) {
   if(!strcmp(szName, "segment")) {
     std::istringstream in(m_strCurrentText);
-    CAlphabetMap::SymbolStream syms(in);
+    ProgressStream syms(in, m_pProg, m_iLastBytes);
     Train(syms);
-    
+    m_iLastBytes = syms.m_iLastPos; //count that segment, ready for next
     m_bInSegment = false;
   }
 }
diff --git a/Src/DasherCore/Trainer.h b/Src/DasherCore/Trainer.h
index a48a894..ca201e7 100644
--- a/Src/DasherCore/Trainer.h
+++ b/Src/DasherCore/Trainer.h
@@ -13,7 +13,12 @@ namespace Dasher {
   public:
     CTrainer(CLanguageModel *pLanguageModel, const CAlphInfo *pInfo, const CAlphabetMap *pAlphabet);
 
-    void LoadFile(const std::string &strFileName);
+    class ProgressIndicator {
+    public:
+      virtual void bytesRead(off_t)=0;
+    };
+    
+    void LoadFile(const std::string &strFileName, ProgressIndicator *pProg=NULL);
   
   protected:
     ///Override AbstractXMLParser methods to extract text in <segment>...</segment> pairs
@@ -22,7 +27,6 @@ namespace Dasher {
     void XmlCData(const XML_Char *szS, int iLen);
 
     virtual void Train(CAlphabetMap::SymbolStream &syms);
-    CLanguageModel *m_pLanguageModel;
     
     ///Try to read a context-switch escape sequence from the symbolstream.
     /// \param sContext context to be reinitialized if a context-switch command is found
@@ -32,6 +36,7 @@ namespace Dasher {
     bool readEscape(CLanguageModel::Context &sContext, CAlphabetMap::SymbolStream &syms);
     
     const CAlphabetMap *m_pAlphabet;
+    CLanguageModel *m_pLanguageModel;
     const CAlphInfo *m_pInfo;
     // symbol number in alphabet of the context-switch character (maybe 0 if not in alphabet!)
     int m_iCtxEsc;
@@ -39,6 +44,10 @@ namespace Dasher {
     //For dealing with XML CData:    
     bool m_bInSegment;
     std::string m_strCurrentText;
+    ///Number of bytes read up to and including end of _previous_ segment in XML.
+    off_t m_iLastBytes;
+    ///Store ProgressIndicator only when parsing XML
+    ProgressIndicator *m_pProg;
   };
 	
   /// Trains a PPMPYLanguageModel (dual alphabet), as for e.g. MandarinDasher.
diff --git a/Src/Gtk2/DasherControl.h b/Src/Gtk2/DasherControl.h
index 3524d60..2187e03 100644
--- a/Src/Gtk2/DasherControl.h
+++ b/Src/Gtk2/DasherControl.h
@@ -135,6 +135,7 @@ public:
   virtual void Stop();
 
   virtual void WriteTrainFile(const std::string &filename, const std::string &strNewText);
+  virtual int GetFileSize(const std::string &strFileName);
 
   virtual void ClearAllContext();
   virtual std::string GetAllContext();
@@ -167,7 +168,6 @@ private:
   virtual void CreateModules();
   virtual void SetupUI();
   virtual void CreateSettingsStore();
-  virtual int GetFileSize(const std::string &strFileName);
   virtual void StartTimer();
   virtual void ShutdownTimer();
 
diff --git a/Src/MacOSX/COSXDasherControl.h b/Src/MacOSX/COSXDasherControl.h
index 5d39e21..077b19b 100644
--- a/Src/MacOSX/COSXDasherControl.h
+++ b/Src/MacOSX/COSXDasherControl.h
@@ -49,6 +49,7 @@ public:
   std::string GetAllContext();
   void ClearAllContext();
   std::string GetContext(unsigned int iOffset, unsigned int iLength);
+  virtual int GetFileSize(const std::string &strFileName);
 private:
   virtual void ScanAlphabetFiles(std::vector<std::string> &vFileList);
   virtual void ScanColourFiles(std::vector<std::string> &vFileList);
@@ -56,7 +57,6 @@ private:
   virtual void CreateModules();
   virtual void SetupUI();
   virtual void CreateSettingsStore();
-  virtual int GetFileSize(const std::string &strFileName);
   virtual void StartTimer();
   virtual void ShutdownTimer();
   virtual bool SupportsSpeech();
diff --git a/Src/Win32/Dasher.h b/Src/Win32/Dasher.h
index 1b128aa..1ff11a9 100644
--- a/Src/Win32/Dasher.h
+++ b/Src/Win32/Dasher.h
@@ -66,6 +66,7 @@ public:
   bool SupportsClipboard() {return true;};
   void CopyToClipboard(const std::string &text);
   
+  virtual int GetFileSize(const std::string &strFileName);
 private:
 
   virtual void ScanAlphabetFiles(std::vector<std::string> &vFileList);
@@ -77,7 +78,6 @@ private:
   virtual void ShutdownTimer();
   void CreateSettingsStore();
 
-  virtual int GetFileSize(const std::string &strFileName);
   void ScanDirectory(const Tstring &strMask, std::vector<std::string> &vFileList);
   bool                    GetWindowSize(int* pTop, int* pLeft, int* pBottom, int* pRight);
   void                    Log();                        // Does the logging
diff --git a/Src/iPhone/Classes/CDasherInterfaceBridge.h b/Src/iPhone/Classes/CDasherInterfaceBridge.h
index 1cc52d2..f30d398 100644
--- a/Src/iPhone/Classes/CDasherInterfaceBridge.h
+++ b/Src/iPhone/Classes/CDasherInterfaceBridge.h
@@ -44,6 +44,7 @@ public:
   
   void SetTiltAxes(Vec3 main, float off, Vec3 slow, float off2);
   virtual void WriteTrainFile(const std::string &filename,const std::string &strNewText);
+  virtual int GetFileSize(const std::string &strFileName);
   bool SupportsClipboard() {return true;}
   void CopyToClipboard(const std::string &strText);
   bool SupportsSpeech();
@@ -61,7 +62,6 @@ private:
   virtual void CreateModules();
   virtual void SetupUI();
   virtual void CreateSettingsStore();
-  virtual int GetFileSize(const std::string &strFileName);
   virtual void StartTimer();
   virtual void ShutdownTimer();
   
diff --git a/Src/iPhone/Classes/CDasherInterfaceBridge.mm b/Src/iPhone/Classes/CDasherInterfaceBridge.mm
index 9537645..e44636e 100644
--- a/Src/iPhone/Classes/CDasherInterfaceBridge.mm
+++ b/Src/iPhone/Classes/CDasherInterfaceBridge.mm
@@ -239,12 +239,13 @@ unsigned int CDasherInterfaceBridge::ctrlDelete(bool bForwards, CControlManager:
 }
 
 int CDasherInterfaceBridge::GetFileSize(const std::string &strFileName) {
-  struct stat sStatInfo;
-  
-  if(!stat(strFileName.c_str(), &sStatInfo))
-    return sStatInfo.st_size;
-  else
-    return 0;
+  int ret=0;
+  if (FILE *file = fopen(strFileName.c_str(), "r")) { //returns non-null for success
+    if (!fseek(file, 0, SEEK_END)) //returns non-null as error code
+      ret = ftell(file);
+    fclose(file);
+  }
+  return ret;
 }
 
 void CDasherInterfaceBridge::WriteTrainFile(const std::string &filename,const std::string &strNewText) {



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]