[dasher: 178/217] Added UTF file loading and saving
- From: Patrick Welche <pwelche src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dasher: 178/217] Added UTF file loading and saving
- Date: Sat, 27 Feb 2016 12:15:17 +0000 (UTC)
commit 28caec404fa3765a45fcda8471582874a4c5c020
Author: Ada Majorek <amajorek google com>
Date: Wed Jan 20 23:31:38 2016 -0800
Added UTF file loading and saving
fixes https://github.com/ipomoena/dasher/issues/76
and most of https://github.com/ipomoena/dasher/issues/14
The only piece left is UI for changeing default encoding.
Src/Common/AppSettingsData.h | 1 +
Src/Common/AppSettingsHeader.h | 2 +-
Src/DasherCore/DasherTypes.h | 1 +
Src/Win32/Widgets/Edit.cpp | 131 ++++++++++++++++++++++++----------------
Src/Win32/Widgets/Edit.h | 7 --
5 files changed, 83 insertions(+), 59 deletions(-)
---
diff --git a/Src/Common/AppSettingsData.h b/Src/Common/AppSettingsData.h
index caeb311..a0b9692 100644
--- a/Src/Common/AppSettingsData.h
+++ b/Src/Common/AppSettingsData.h
@@ -36,6 +36,7 @@ Dasher::Settings::bp_table app_boolparamtable[] = {
};
Dasher::Settings::lp_table app_longparamtable[] = {
+ {APP_LP_FILE_ENCODING, "FileEncodingFormat", Persistence::PERSISTENT, -1, "FileEncodingFormat"},
{APP_LP_EDIT_FONT_SIZE, "EditFontSize", Persistence::PERSISTENT, 0, "EditFontSize"},
{APP_LP_EDIT_HEIGHT, "EditHeight", Persistence::PERSISTENT, 75, "The height of the edit window"},
{APP_LP_EDIT_WIDTH, "EditWidth", Persistence::PERSISTENT, 200, "EditWidth"},
diff --git a/Src/Common/AppSettingsHeader.h b/Src/Common/AppSettingsHeader.h
index efcda71..bdab7f1 100644
--- a/Src/Common/AppSettingsHeader.h
+++ b/Src/Common/AppSettingsHeader.h
@@ -12,7 +12,7 @@ enum {
};
enum {
- APP_LP_EDIT_FONT_SIZE = END_OF_APP_BPS, // TODO Extract font size from APP_SP_EDIT_FONT as linux
+ APP_LP_FILE_ENCODING = END_OF_APP_BPS, APP_LP_EDIT_FONT_SIZE, // TODO Extract font size from
APP_SP_EDIT_FONT as linux
APP_LP_EDIT_HEIGHT,
APP_LP_EDIT_WIDTH,
APP_LP_SCREEN_WIDTH, APP_LP_SCREEN_HEIGHT, APP_LP_SCREEN_WIDTH_H, APP_LP_SCREEN_HEIGHT_H,
diff --git a/Src/DasherCore/DasherTypes.h b/Src/DasherCore/DasherTypes.h
index 12cb977..1b5126e 100644
--- a/Src/DasherCore/DasherTypes.h
+++ b/Src/DasherCore/DasherTypes.h
@@ -56,6 +56,7 @@ namespace Dasher {
namespace Opts {
// Numbers should be applied to elements of the following two enumerations as these preferences may be
stored to file. Constancy between
// versions is a good idea. It should *not* be assumed that the numbers map onto anything useful.
Different codepages may be appropriate on different systems for different character sets.
+ enum FileEncodingFormats { UserDefault = -1, UTF8 = 65001, UTF16LE = 1200, UTF16BE = 1201 };
enum AlphabetTypes { MyNone = 0, Arabic = 1256, Baltic = 1257, CentralEurope = 1250, ChineseSimplified =
936, ChineseTraditional = 950, Cyrillic = 1251, Greek = 1253, Hebrew = 1255, Japanese = 932, Korean = 949,
Thai = 874, Turkish = 1254, VietNam = 1258, Western = 1252 };
enum ScreenOrientations { AlphabetDefault = -2, LeftToRight = 0, RightToLeft = 1, TopToBottom = 2,
BottomToTop = 3 };
enum FontSize { Normal = 1, Big = 2, VBig = 4 };
diff --git a/Src/Win32/Widgets/Edit.cpp b/Src/Win32/Widgets/Edit.cpp
index 7d321d6..d9d92d6 100644
--- a/Src/Win32/Widgets/Edit.cpp
+++ b/Src/Win32/Widgets/Edit.cpp
@@ -40,9 +40,7 @@ using namespace WinUTF8;
CEdit::CEdit(CAppSettings *pAppSettings) {
m_FontSize = 0;
m_FontName = "";
- FileHandle = INVALID_HANDLE_VALUE;
m_FilenameGUI = 0;
- threadid = 0;
// TODO: Check that this is all working okay (it quite probably
// isn't). In the long term need specialised editor classes.
@@ -70,10 +68,7 @@ HWND CEdit::Create(HWND hParent, bool bNewWithDate) {
CEdit::~CEdit() {
DeleteObject(m_Font);
-
delete m_FilenameGUI;
- if(FileHandle != INVALID_HANDLE_VALUE)
- CloseHandle(FileHandle);
}
void CEdit::Move(int x, int y, int Width, int Height) {
@@ -81,25 +76,47 @@ void CEdit::Move(int x, int y, int Width, int Height) {
}
bool CEdit::Save() {
- if(FileHandle == INVALID_HANDLE_VALUE) {
- if(m_filename == TEXT(""))
- return false;
- FileHandle = CreateFile(m_filename.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ,
(LPSECURITY_ATTRIBUTES) NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, (HANDLE) NULL);
-
- if(FileHandle == INVALID_HANDLE_VALUE)
- return false;
- }
+ if (m_filename == TEXT(""))
+ return false;
- // Truncate File to 0 bytes.
- SetFilePointer(FileHandle, NULL, NULL, FILE_BEGIN);
- SetEndOfFile(FileHandle);
+ HANDLE FileHandle = CreateFile(m_filename.c_str(), GENERIC_WRITE, 0,
+ (LPSECURITY_ATTRIBUTES)NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, (HANDLE)NULL);
+ if (FileHandle == INVALID_HANDLE_VALUE)
+ return false;
CString wideText;
GetWindowText(wideText);
- CStringA mbcsText(wideText);
+
DWORD NumberOfBytesWritten; // Used by WriteFile
- WriteFile(FileHandle, mbcsText, mbcsText.GetLength(), &NumberOfBytesWritten, NULL);
- // The file handle is not closed here. We keep a write-lock on the file to stop other programs confusing
us.
+ switch (m_pAppSettings->GetLongParameter(APP_LP_FILE_ENCODING))
+ {
+ case Opts::UTF8: {
+ WriteFile(FileHandle, "\xEF\xBB\xBF", 3, &NumberOfBytesWritten, NULL);
+ string utf8Text = wstring_to_UTF8string(wideText);
+ WriteFile(FileHandle, utf8Text.c_str(), utf8Text.size(), &NumberOfBytesWritten, NULL);
+ break;
+ }
+ case Opts::UTF16LE: {
+ // TODO I am assuming this machine is LE. Do any windows (perhaps CE) machines run on BE?
+ WriteFile(FileHandle, "\xFF\xFE", 2, &NumberOfBytesWritten, NULL);
+ WriteFile(FileHandle, wideText.GetBuffer(), wideText.GetLength() * 2, &NumberOfBytesWritten, NULL);
+ break;
+ }
+ case Opts::UTF16BE: {
+ // TODO I am again assuming this machine is LE.
+ WriteFile(FileHandle, "\xFE\xFF", 2, &NumberOfBytesWritten, NULL);
+ for (unsigned int i = 0; i < wideText.GetLength(); i++) {
+ wideText.SetAt(i, _byteswap_ushort(wideText[i]));
+ }
+ WriteFile(FileHandle, wideText.GetBuffer(), wideText.GetLength() * 2, &NumberOfBytesWritten, NULL);
+ break;
+ }
+ default:
+ CStringA mbcsText(wideText); // converts wide string to current locale
+ WriteFile(FileHandle, mbcsText, mbcsText.GetLength(), &NumberOfBytesWritten, NULL);
+ break;
+ }
+ CloseHandle(FileHandle);
m_FilenameGUI->SetDirty(false);
m_dirty = false;
@@ -157,9 +174,6 @@ void CEdit::TNew(const Tstring &filename) {
m_filename = m_FilenameGUI->New();
else
m_filename = filename;
- if(FileHandle != INVALID_HANDLE_VALUE)
- CloseHandle(FileHandle);
- FileHandle = INVALID_HANDLE_VALUE;
Clear();
}
@@ -169,34 +183,61 @@ bool CEdit::TOpen(const Tstring &filename) {
// Best thing is probably to trust any BOMs at the beginning of file, but otherwise
// to believe menu. Unicode files don't necessarily have BOMs, especially from Unix.
- HANDLE TmpHandle = CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE,
+ HANDLE FileHandle = CreateFile(filename.c_str(), GENERIC_READ,
FILE_SHARE_READ, (LPSECURITY_ATTRIBUTES) NULL,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
(HANDLE) NULL);
- if(TmpHandle == INVALID_HANDLE_VALUE)
+ if(FileHandle == INVALID_HANDLE_VALUE)
return false;
- if(FileHandle != INVALID_HANDLE_VALUE)
- CloseHandle(FileHandle);
- FileHandle = TmpHandle;
m_filename = filename;
-
SetFilePointer(FileHandle, NULL, NULL, FILE_BEGIN);
-
DWORD filesize = GetFileSize(FileHandle, NULL);
- unsigned long amountread;
-
- char *filebuffer = new char[filesize];
-
- // Just read in whole file as char* and cast later.
-
+ unsigned long amountread = 0;
+ CStringA filestr;
+ char* filebuffer = filestr.GetBufferSetLength(filesize+2);
ReadFile(FileHandle, filebuffer, filesize, &amountread, NULL);
+ filebuffer[amountread] = 0;
+ filebuffer[amountread+1] = 0;
+ long encoding = m_pAppSettings->GetLongParameter(APP_LP_FILE_ENCODING);
+ bool removeBOM = false;
+ if (amountread >= 3 && strncmp(filebuffer, "\xEF\xBB\xBF", 3) == 0) {
+ encoding = Opts::UTF8;
+ removeBOM = true;
+ }
+ if (amountread >= 2 && strncmp(filebuffer, "\xFF\xFE", 2) == 0) {
+ encoding = Opts::UTF16LE;
+ removeBOM = true;
+ }
+ if (amountread >= 2 && strncmp(filebuffer, "\xFE\xFF", 2) == 0) {
+ encoding = Opts::UTF16BE;
+ removeBOM = true;
+ }
- string text;
- text = text + filebuffer;
- Tstring inserttext;
- UTF8string_to_wstring(text, inserttext);
+ wstring inserttext;
+ switch (encoding) {
+ case Opts::UTF8: {
+ UTF8string_to_wstring(filebuffer + (removeBOM ? 3 : 0), inserttext);
+ break;
+ }
+ case Opts::UTF16LE: {
+ inserttext = reinterpret_cast<wchar_t*>(filebuffer+ (removeBOM ? 2 : 0));
+ break;
+ }
+ case Opts::UTF16BE: {
+ wchar_t* widePtr = reinterpret_cast<wchar_t*>(filebuffer + (removeBOM ? 2 : 0));
+ for (unsigned int i = 0; widePtr[i]; i++) {
+ widePtr[i] = _byteswap_ushort(widePtr[i]);
+ }
+ inserttext = widePtr;
+ break;
+ }
+ default:
+ CString wideFromMBCS(filestr); // converts mbcs to wide string
+ inserttext = wideFromMBCS;
+ break;
+ }
InsertText(inserttext);
m_FilenameGUI->SetFilename(m_filename);
@@ -206,18 +247,6 @@ bool CEdit::TOpen(const Tstring &filename) {
}
bool CEdit::TSaveAs(const Tstring &filename) {
- HANDLE TmpHandle = CreateFile(filename.c_str(), GENERIC_READ | GENERIC_WRITE,
- FILE_SHARE_READ, (LPSECURITY_ATTRIBUTES) NULL,
- CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL,
- (HANDLE) NULL);
-
- if(TmpHandle == INVALID_HANDLE_VALUE)
- return false;
-
- if(FileHandle != INVALID_HANDLE_VALUE)
- CloseHandle(FileHandle);
- FileHandle = TmpHandle;
-
m_filename = filename;
if(Save()) {
m_FilenameGUI->SetFilename(m_filename);
diff --git a/Src/Win32/Widgets/Edit.h b/Src/Win32/Widgets/Edit.h
index b5daf93..6b85786 100644
--- a/Src/Win32/Widgets/Edit.h
+++ b/Src/Win32/Widgets/Edit.h
@@ -81,9 +81,6 @@ class CEdit : public ATL::CWindowImpl<CEdit> {
return m_hWnd;
}
- // As EN_UPDATE message go to parent, need this. void UserSave(HANDLE FileHandle);
- void UserOpen(HANDLE FileHandle);
-
int Move(bool bForwards, Dasher::CControlManager::EditDistance iDist);
int Delete(bool bForwards, Dasher::CControlManager::EditDistance iDist);
std::string GetTextAroundCursor(Dasher::CControlManager::EditDistance iDist);
@@ -121,7 +118,6 @@ class CEdit : public ATL::CWindowImpl<CEdit> {
protected:
bool m_dirty;
- LRESULT WndProc(HWND Window, UINT message, WPARAM wParam, LPARAM lParam);
private:
Dasher::CDasherInterfaceBase *m_pDasherInterface;
@@ -132,8 +128,6 @@ class CEdit : public ATL::CWindowImpl<CEdit> {
HWND m_hTarget;
bool m_bForwardKeyboard;
- HANDLE FileHandle; // Keeping a lock on files makes File I/O safer,
- // especially for the append mode!
CFilenameGUI *m_FilenameGUI;
Tstring m_filename;
HWND textwindow;
@@ -149,7 +143,6 @@ class CEdit : public ATL::CWindowImpl<CEdit> {
std::string m_Output; // UTF-8 to go to training file
UINT CodePage; // for font and possible for finding the encoding
- DWORD threadid;
HWND targetwindow;
bool textentry;
#ifdef _UNICODE
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]