[dasher: 178/217] Added UTF file loading and saving



commit 28caec404fa3765a45fcda8471582874a4c5c020
Author: Ada Majorek <amajorek google com>
Date:   Wed Jan 20 23:31:38 2016 -0800

    Added UTF file loading and saving
    
    fixes https://github.com/ipomoena/dasher/issues/76
    and most of https://github.com/ipomoena/dasher/issues/14
    The only piece left is UI for changeing default encoding.

 Src/Common/AppSettingsData.h   |    1 +
 Src/Common/AppSettingsHeader.h |    2 +-
 Src/DasherCore/DasherTypes.h   |    1 +
 Src/Win32/Widgets/Edit.cpp     |  131 ++++++++++++++++++++++++----------------
 Src/Win32/Widgets/Edit.h       |    7 --
 5 files changed, 83 insertions(+), 59 deletions(-)
---
diff --git a/Src/Common/AppSettingsData.h b/Src/Common/AppSettingsData.h
index caeb311..a0b9692 100644
--- a/Src/Common/AppSettingsData.h
+++ b/Src/Common/AppSettingsData.h
@@ -36,6 +36,7 @@ Dasher::Settings::bp_table app_boolparamtable[] = {
 };
 
 Dasher::Settings::lp_table app_longparamtable[] = {
+  {APP_LP_FILE_ENCODING, "FileEncodingFormat", Persistence::PERSISTENT, -1, "FileEncodingFormat"},
   {APP_LP_EDIT_FONT_SIZE, "EditFontSize", Persistence::PERSISTENT, 0, "EditFontSize"},
   {APP_LP_EDIT_HEIGHT, "EditHeight", Persistence::PERSISTENT, 75, "The height of the edit window"},
   {APP_LP_EDIT_WIDTH, "EditWidth", Persistence::PERSISTENT, 200, "EditWidth"},
diff --git a/Src/Common/AppSettingsHeader.h b/Src/Common/AppSettingsHeader.h
index efcda71..bdab7f1 100644
--- a/Src/Common/AppSettingsHeader.h
+++ b/Src/Common/AppSettingsHeader.h
@@ -12,7 +12,7 @@ enum {
 };
 
 enum { 
-  APP_LP_EDIT_FONT_SIZE = END_OF_APP_BPS, // TODO Extract font size from APP_SP_EDIT_FONT as linux
+  APP_LP_FILE_ENCODING = END_OF_APP_BPS, APP_LP_EDIT_FONT_SIZE, // TODO Extract font size from 
APP_SP_EDIT_FONT as linux
   APP_LP_EDIT_HEIGHT,
   APP_LP_EDIT_WIDTH, 
   APP_LP_SCREEN_WIDTH, APP_LP_SCREEN_HEIGHT, APP_LP_SCREEN_WIDTH_H, APP_LP_SCREEN_HEIGHT_H, 
diff --git a/Src/DasherCore/DasherTypes.h b/Src/DasherCore/DasherTypes.h
index 12cb977..1b5126e 100644
--- a/Src/DasherCore/DasherTypes.h
+++ b/Src/DasherCore/DasherTypes.h
@@ -56,6 +56,7 @@ namespace Dasher {
   namespace Opts {
     // Numbers should be applied to elements of the following two enumerations as these preferences may be 
stored to file. Constancy between
     // versions is a good idea. It should *not* be assumed that the numbers map onto anything useful. 
Different codepages may be appropriate on different systems for different character sets.
+    enum FileEncodingFormats { UserDefault = -1, UTF8 = 65001, UTF16LE = 1200, UTF16BE = 1201 }; 
     enum AlphabetTypes { MyNone = 0, Arabic = 1256, Baltic = 1257, CentralEurope = 1250, ChineseSimplified = 
936, ChineseTraditional = 950, Cyrillic = 1251, Greek = 1253, Hebrew = 1255, Japanese = 932, Korean = 949, 
Thai = 874, Turkish = 1254, VietNam = 1258, Western = 1252 };
     enum ScreenOrientations { AlphabetDefault = -2, LeftToRight = 0, RightToLeft = 1, TopToBottom = 2, 
BottomToTop = 3 };
     enum FontSize { Normal = 1, Big = 2, VBig = 4 };
diff --git a/Src/Win32/Widgets/Edit.cpp b/Src/Win32/Widgets/Edit.cpp
index 7d321d6..d9d92d6 100644
--- a/Src/Win32/Widgets/Edit.cpp
+++ b/Src/Win32/Widgets/Edit.cpp
@@ -40,9 +40,7 @@ using namespace WinUTF8;
 CEdit::CEdit(CAppSettings *pAppSettings) {
   m_FontSize = 0;
   m_FontName = "";
-  FileHandle = INVALID_HANDLE_VALUE;
   m_FilenameGUI = 0;
-  threadid = 0;
   
   // TODO: Check that this is all working okay (it quite probably
   // isn't). In the long term need specialised editor classes.
@@ -70,10 +68,7 @@ HWND CEdit::Create(HWND hParent, bool bNewWithDate) {
 
 CEdit::~CEdit() {
   DeleteObject(m_Font);
-
   delete m_FilenameGUI;
-  if(FileHandle != INVALID_HANDLE_VALUE)
-    CloseHandle(FileHandle);
 }
 
 void CEdit::Move(int x, int y, int Width, int Height) {
@@ -81,25 +76,47 @@ void CEdit::Move(int x, int y, int Width, int Height) {
 }
 
 bool CEdit::Save() {
-  if(FileHandle == INVALID_HANDLE_VALUE) {
-    if(m_filename == TEXT(""))
-      return false;
-    FileHandle = CreateFile(m_filename.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, 
(LPSECURITY_ATTRIBUTES) NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, (HANDLE) NULL);
-
-    if(FileHandle == INVALID_HANDLE_VALUE)
-      return false;
-  }
+  if (m_filename == TEXT(""))
+    return false;
 
-  // Truncate File to 0 bytes.
-  SetFilePointer(FileHandle, NULL, NULL, FILE_BEGIN);
-  SetEndOfFile(FileHandle);
+  HANDLE FileHandle = CreateFile(m_filename.c_str(), GENERIC_WRITE, 0, 
+    (LPSECURITY_ATTRIBUTES)NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, (HANDLE)NULL);
+  if (FileHandle == INVALID_HANDLE_VALUE)
+    return false;
 
   CString wideText;
   GetWindowText(wideText);
-  CStringA mbcsText(wideText);
+
   DWORD NumberOfBytesWritten;   // Used by WriteFile
-  WriteFile(FileHandle, mbcsText, mbcsText.GetLength(), &NumberOfBytesWritten, NULL);
-  // The file handle is not closed here. We keep a write-lock on the file to stop other programs confusing 
us.
+  switch (m_pAppSettings->GetLongParameter(APP_LP_FILE_ENCODING))
+  {
+  case Opts::UTF8: {
+    WriteFile(FileHandle, "\xEF\xBB\xBF", 3, &NumberOfBytesWritten, NULL);
+    string utf8Text = wstring_to_UTF8string(wideText);
+    WriteFile(FileHandle, utf8Text.c_str(), utf8Text.size(), &NumberOfBytesWritten, NULL);
+    break;
+  }
+  case Opts::UTF16LE: {
+    // TODO I am assuming this machine is LE. Do any windows (perhaps CE) machines run on BE?
+    WriteFile(FileHandle, "\xFF\xFE", 2, &NumberOfBytesWritten, NULL);
+    WriteFile(FileHandle, wideText.GetBuffer(), wideText.GetLength() * 2, &NumberOfBytesWritten, NULL);
+    break;
+  }
+  case Opts::UTF16BE: {
+    // TODO I am again assuming this machine is LE.
+    WriteFile(FileHandle, "\xFE\xFF", 2, &NumberOfBytesWritten, NULL);
+    for (unsigned int i = 0; i < wideText.GetLength(); i++) {
+      wideText.SetAt(i, _byteswap_ushort(wideText[i]));
+    }
+    WriteFile(FileHandle, wideText.GetBuffer(), wideText.GetLength() * 2, &NumberOfBytesWritten, NULL);
+    break;
+  }
+  default:
+    CStringA mbcsText(wideText); // converts wide string to current locale
+    WriteFile(FileHandle, mbcsText, mbcsText.GetLength(), &NumberOfBytesWritten, NULL);
+    break;
+  }
+  CloseHandle(FileHandle);
 
   m_FilenameGUI->SetDirty(false);
   m_dirty = false;
@@ -157,9 +174,6 @@ void CEdit::TNew(const Tstring &filename) {
     m_filename = m_FilenameGUI->New();
   else
     m_filename = filename;
-  if(FileHandle != INVALID_HANDLE_VALUE)
-    CloseHandle(FileHandle);
-  FileHandle = INVALID_HANDLE_VALUE;
   Clear();
 }
 
@@ -169,34 +183,61 @@ bool CEdit::TOpen(const Tstring &filename) {
   // Best thing is probably to trust any BOMs at the beginning of file, but otherwise
   // to believe menu. Unicode files don't necessarily have BOMs, especially from Unix.
 
-  HANDLE TmpHandle = CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE,
+  HANDLE FileHandle = CreateFile(filename.c_str(), GENERIC_READ,
                                 FILE_SHARE_READ, (LPSECURITY_ATTRIBUTES) NULL,
                                 OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
                                 (HANDLE) NULL);
 
-  if(TmpHandle == INVALID_HANDLE_VALUE)
+  if(FileHandle == INVALID_HANDLE_VALUE)
     return false;
 
-  if(FileHandle != INVALID_HANDLE_VALUE)
-    CloseHandle(FileHandle);
-  FileHandle = TmpHandle;
   m_filename = filename;
-
   SetFilePointer(FileHandle, NULL, NULL, FILE_BEGIN);
-
   DWORD filesize = GetFileSize(FileHandle, NULL);
-  unsigned long amountread;
-
-  char *filebuffer = new char[filesize];
-
-  // Just read in whole file as char* and cast later.
-
+  unsigned long amountread = 0;
+  CStringA filestr;
+  char* filebuffer = filestr.GetBufferSetLength(filesize+2);
   ReadFile(FileHandle, filebuffer, filesize, &amountread, NULL);
+  filebuffer[amountread] = 0;
+  filebuffer[amountread+1] = 0;
+  long encoding = m_pAppSettings->GetLongParameter(APP_LP_FILE_ENCODING);
+  bool removeBOM = false;
+  if (amountread >= 3 && strncmp(filebuffer, "\xEF\xBB\xBF", 3) == 0) {
+    encoding = Opts::UTF8;
+    removeBOM = true;
+  }
+  if (amountread >= 2 && strncmp(filebuffer, "\xFF\xFE", 2) == 0) {
+    encoding = Opts::UTF16LE;
+    removeBOM = true;
+  }
+  if (amountread >= 2 && strncmp(filebuffer, "\xFE\xFF", 2) == 0) {
+    encoding = Opts::UTF16BE;
+    removeBOM = true;
+  }
 
-  string text;
-  text = text + filebuffer;
-  Tstring inserttext;
-  UTF8string_to_wstring(text, inserttext);
+  wstring inserttext;
+  switch (encoding) {
+  case Opts::UTF8: {
+    UTF8string_to_wstring(filebuffer + (removeBOM ? 3 : 0), inserttext);
+    break;
+  }
+  case Opts::UTF16LE: {
+    inserttext = reinterpret_cast<wchar_t*>(filebuffer+ (removeBOM ? 2 : 0));
+    break;
+  }
+  case Opts::UTF16BE: {
+    wchar_t* widePtr = reinterpret_cast<wchar_t*>(filebuffer + (removeBOM ? 2 : 0));
+    for (unsigned int i = 0; widePtr[i]; i++) {
+      widePtr[i] = _byteswap_ushort(widePtr[i]);
+    }
+    inserttext = widePtr;
+    break;
+  }
+  default:
+    CString wideFromMBCS(filestr); // converts mbcs to wide string
+    inserttext = wideFromMBCS;
+    break;
+  }
   InsertText(inserttext);
 
   m_FilenameGUI->SetFilename(m_filename);
@@ -206,18 +247,6 @@ bool CEdit::TOpen(const Tstring &filename) {
 }
 
 bool CEdit::TSaveAs(const Tstring &filename) {
-  HANDLE TmpHandle = CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE,
-                                FILE_SHARE_READ, (LPSECURITY_ATTRIBUTES) NULL,
-                                CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL,
-                                (HANDLE) NULL);
-
-  if(TmpHandle == INVALID_HANDLE_VALUE)
-    return false;
-
-  if(FileHandle != INVALID_HANDLE_VALUE)
-    CloseHandle(FileHandle);
-  FileHandle = TmpHandle;
-
   m_filename = filename;
   if(Save()) {
     m_FilenameGUI->SetFilename(m_filename);
diff --git a/Src/Win32/Widgets/Edit.h b/Src/Win32/Widgets/Edit.h
index b5daf93..6b85786 100644
--- a/Src/Win32/Widgets/Edit.h
+++ b/Src/Win32/Widgets/Edit.h
@@ -81,9 +81,6 @@ class CEdit : public ATL::CWindowImpl<CEdit> {
     return m_hWnd;
   } 
   
-  // As EN_UPDATE message go to parent, need this. void UserSave(HANDLE FileHandle);
-  void UserOpen(HANDLE FileHandle);
-  
   int Move(bool bForwards, Dasher::CControlManager::EditDistance iDist);
   int Delete(bool bForwards, Dasher::CControlManager::EditDistance iDist);
   std::string GetTextAroundCursor(Dasher::CControlManager::EditDistance iDist);
@@ -121,7 +118,6 @@ class CEdit : public ATL::CWindowImpl<CEdit> {
 
  protected:
   bool m_dirty;
-  LRESULT WndProc(HWND Window, UINT message, WPARAM wParam, LPARAM lParam);
   
  private:  
   Dasher::CDasherInterfaceBase *m_pDasherInterface;
@@ -132,8 +128,6 @@ class CEdit : public ATL::CWindowImpl<CEdit> {
   HWND m_hTarget;
   bool m_bForwardKeyboard;
   
-  HANDLE FileHandle;            // Keeping a lock on files makes File I/O safer,
-  // especially for the append mode!
   CFilenameGUI *m_FilenameGUI;
   Tstring m_filename;
   HWND textwindow;
@@ -149,7 +143,6 @@ class CEdit : public ATL::CWindowImpl<CEdit> {
   std::string m_Output;         // UTF-8 to go to training file
   UINT CodePage;                // for font and possible for finding the encoding
   
-  DWORD threadid;
   HWND targetwindow;
   bool textentry;
 #ifdef _UNICODE


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]