[dots] Set inputTextEncoding as UTF-8 for odt and pdf documents



commit 73826834c5b28d0fb1e4911b70b91d3a1054b3e0
Author: Fernando Herrera <fherrera onirica com>
Date:   Mon Jun 7 21:20:14 2010 +0200

    Set inputTextEncoding as UTF-8 for odt and pdf documents

 dots/odtdocument.py |    1 +
 dots/pdfdocument.py |    2 ++
 2 files changed, 3 insertions(+), 0 deletions(-)
---
diff --git a/dots/odtdocument.py b/dots/odtdocument.py
index 8e0a2ad..c1fdd52 100644
--- a/dots/odtdocument.py
+++ b/dots/odtdocument.py
@@ -24,6 +24,7 @@ from odf.odf2xhtml import ODF2XHTML
 class OdtDocument(Document):
 
     def translate(self, config):
+	config['outputFormat']['inputTextEncoding'] = "UTF8"
 	self.translator = Translator(config)
 	odhandler = ODF2XHTML (False, False)
 	odhandler.elements[(TEXTNS, u"changed-region")] = (odhandler.s_ignorexml,None)
diff --git a/dots/pdfdocument.py b/dots/pdfdocument.py
index a572119..c030647 100644
--- a/dots/pdfdocument.py
+++ b/dots/pdfdocument.py
@@ -22,6 +22,8 @@ import poppler
 
 class PdfDocument(Document):
     def translate(self, config):
+	# FIXME: Check if poppler gives us always UTF-8 strings
+	config['outputFormat']['inputTextEncoding'] = "UTF8"
 	self.translator = Translator(config)
 	uri = "file://" + self.input_file
 	document = poppler.document_new_from_file (uri, None)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]