[ocrfeeder] Fix unicode issues when exporting files
- From: Joaquim Manuel Pereira Rocha <jrocha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ocrfeeder] Fix unicode issues when exporting files
- Date: Sat, 4 Jun 2016 21:22:30 +0000 (UTC)
commit b3e7ed735f241acd2b7d7aa97be7b26bffdbc9ed
Author: Joaquim Rocha <me joaquimrocha com>
Date: Sat Jun 4 23:12:39 2016 +0200
Fix unicode issues when exporting files
These changes make sure that unicode is used in DataBox, and that the
document generators use it correctly when exporting the data to files.
src/ocrfeeder/feeder/documentGeneration.py | 10 +++++-----
src/ocrfeeder/studio/dataHolder.py | 8 ++++----
src/ocrfeeder/util/lib.py | 6 ++++++
3 files changed, 15 insertions(+), 9 deletions(-)
---
diff --git a/src/ocrfeeder/feeder/documentGeneration.py b/src/ocrfeeder/feeder/documentGeneration.py
index 51037af..152e9c3 100644
--- a/src/ocrfeeder/feeder/documentGeneration.py
+++ b/src/ocrfeeder/feeder/documentGeneration.py
@@ -213,12 +213,12 @@ class HtmlGenerator(DocumentGenerator):
os.mkdir(images_folder)
if pages:
file = open(os.path.join(self.name, 'index.html'), 'w')
- file.write(pages[0])
+ file.write(pages[0].encode('utf-8'))
file.close()
if len(pages) > 1:
for i in xrange(1, len(pages)):
file = open(os.path.join(self.name, 'page%s.html' % (i + 1)), 'w')
- file.write(pages[i])
+ file.write(pages[i].encode('utf-8'))
file.close()
if self.styles:
file = open(os.path.join(self.name, 'style.css'), 'w')
@@ -248,7 +248,7 @@ class OdtGenerator(DocumentGenerator):
self.document.automaticstyles.addElement(frame_style_rotated)
def addText(self, data_box):
- text = data_box.getText().decode('utf-8')
+ text = data_box.getText()
frame_style = Style(name='FrameStyle', family = 'graphic')
debug('Angle: %s' % data_box.text_data.angle)
angle = data_box.text_data.angle
@@ -355,10 +355,10 @@ class OdtGenerator(DocumentGenerator):
class PlaintextGenerator(DocumentGenerator):
def __init__(self, name):
self.name = name
- self.text = ''
+ self.text = u''
def addText(self, newText):
- self.text += unicode(newText, 'utf-8')
+ self.text += newText
def addPage(self, page):
self.addText(page.getTextFromBoxes())
diff --git a/src/ocrfeeder/studio/dataHolder.py b/src/ocrfeeder/studio/dataHolder.py
index 2821960..204f1a3 100644
--- a/src/ocrfeeder/studio/dataHolder.py
+++ b/src/ocrfeeder/studio/dataHolder.py
@@ -72,7 +72,7 @@ class DataBox(GObject.GObject):
(GObject.TYPE_INT,))
}
- def __init__(self, x = 0, y = 0, width = 0, height = 0, image = None, type = TEXT_TYPE, text = ''):
+ def __init__(self, x = 0, y = 0, width = 0, height = 0, image = None, type = TEXT_TYPE, text = u''):
super(DataBox, self).__init__()
self.x = int(x)
self.y = int(y)
@@ -81,7 +81,7 @@ class DataBox(GObject.GObject):
self.image = image
self.setType(type)
self.text_data = TextData()
- self.text = text
+ self.text = self.setText(text)
def configTextData(self, face = 'Sans', size = 12, justification = ALIGN_LEFT, line_space = 1,
letter_space = 1):
self.text_data = TextData(face, size, justification, line_space, letter_space)
@@ -132,7 +132,7 @@ class DataBox(GObject.GObject):
self.text_data.weight = font_weight
def setText(self, text):
- self.text = text
+ self.text = lib.ensureUnicode(text)
def getText(self):
return self.text
@@ -233,7 +233,7 @@ class PageData:
return {'PageData': dictionary}
def getTextFromBoxes(self, data_boxes=None):
- text = ''
+ text = u''
if data_boxes is None:
data_boxes = self.data_boxes
number_of_boxes = len(data_boxes)
diff --git a/src/ocrfeeder/util/lib.py b/src/ocrfeeder/util/lib.py
index 73b03b9..db60c75 100644
--- a/src/ocrfeeder/util/lib.py
+++ b/src/ocrfeeder/util/lib.py
@@ -191,3 +191,9 @@ def makeRadioButton(label, from_widget=None):
button.set_use_underline(True)
return button
+
+def ensureUnicode(text):
+ if isinstance(text, unicode):
+ return text
+
+ return unicode(text, 'utf-8')
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]