[dots] Set inputTextEncoding as UTF-8 for odt and pdf documents
- From: Fernando Herrera de las Heras <fherrera src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dots] Set inputTextEncoding as UTF-8 for odt and pdf documents
- Date: Thu, 10 Jun 2010 15:31:02 +0000 (UTC)
commit 73826834c5b28d0fb1e4911b70b91d3a1054b3e0
Author: Fernando Herrera <fherrera onirica com>
Date: Mon Jun 7 21:20:14 2010 +0200
Set inputTextEncoding as UTF-8 for odt and pdf documents
dots/odtdocument.py | 1 +
dots/pdfdocument.py | 2 ++
2 files changed, 3 insertions(+), 0 deletions(-)
---
diff --git a/dots/odtdocument.py b/dots/odtdocument.py
index 8e0a2ad..c1fdd52 100644
--- a/dots/odtdocument.py
+++ b/dots/odtdocument.py
@@ -24,6 +24,7 @@ from odf.odf2xhtml import ODF2XHTML
class OdtDocument(Document):
def translate(self, config):
+ config['outputFormat']['inputTextEncoding'] = "UTF8"
self.translator = Translator(config)
odhandler = ODF2XHTML (False, False)
odhandler.elements[(TEXTNS, u"changed-region")] = (odhandler.s_ignorexml,None)
diff --git a/dots/pdfdocument.py b/dots/pdfdocument.py
index a572119..c030647 100644
--- a/dots/pdfdocument.py
+++ b/dots/pdfdocument.py
@@ -22,6 +22,8 @@ import poppler
class PdfDocument(Document):
def translate(self, config):
+ # FIXME: Check if poppler gives us always UTF-8 strings
+ config['outputFormat']['inputTextEncoding'] = "UTF8"
self.translator = Translator(config)
uri = "file://" + self.input_file
document = poppler.document_new_from_file (uri, None)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]