[dots] Split pdf/odt/xml logic
- From: Fernando Herrera de las Heras <fherrera src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dots] Split pdf/odt/xml logic
- Date: Thu, 10 Jun 2010 15:30:52 +0000 (UTC)
commit 86e4b60946d7622f0b0ea675241b6cb159b6a943
Author: Fernando Herrera <fherrera onirica com>
Date: Mon Jun 7 15:49:55 2010 +0200
Split pdf/odt/xml logic
dots/Makefile.am | 4 +++
dots/document.py | 68 ----------------------------------------------
dots/document_builder.py | 57 ++++++++++++++++++++++++++++++++++++++
dots/import_assistant.py | 4 +-
dots/odtdocument.py | 33 ++++++++++++++++++++++
dots/pdfdocument.py | 46 +++++++++++++++++++++++++++++++
dots/xmldocument.py | 29 +++++++++++++++++++
7 files changed, 171 insertions(+), 70 deletions(-)
---
diff --git a/dots/Makefile.am b/dots/Makefile.am
index a02b83f..5a196ee 100644
--- a/dots/Makefile.am
+++ b/dots/Makefile.am
@@ -9,6 +9,10 @@ dots_PYTHON = \
host_settings.py \
config_builder.py \
document.py \
+ pdfdocument.py \
+ odtdocument.py \
+ xmldocument.py \
+ document_builder.py \
translator.py
DISTCLEANFILES = host_settings.py
diff --git a/dots/document.py b/dots/document.py
index 70097bd..dec2b11 100644
--- a/dots/document.py
+++ b/dots/document.py
@@ -16,7 +16,6 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-import mimetypes
from translator import Translator
class Document():
@@ -28,72 +27,5 @@ class Document():
def set_output_file (self, output_file):
self.output_file = output_file
- def translate(self, config):
- self.translator = Translator(config)
- mime_type, encoding = mimetypes.guess_type (self.input_file)
- if mime_type == "application/pdf":
- self.translate_pdf ()
- elif mime_type == "application/vnd.oasis.opendocument.text":
- self.translate_odt ()
- else:
- self.translate_xml ()
-
- def translate_pdf(self):
- import poppler
- uri = "file://" + self.input_file
- document = poppler.document_new_from_file (uri, None)
- npages = document.get_n_pages()
- for p in range(0,npages-1):
- print p
- page = document.get_page(p)
- w,h = page.get_size()
- r = poppler.Rectangle ()
- r.x1 = 0
- r.x2 = w
- r.y1 = 0
- r.y2 = h
- # Currently we are getting the layout from the pdf here
- # we should collapse it
- text = page.get_text(poppler.SELECTION_LINE,r)
- print text
- return
-
- def translate_odt(self):
- from odf.odf2xhtml import TEXTNS
- from odf.odf2xhtml import ODF2XHTML
- odhandler = ODF2XHTML (False, False)
- odhandler.elements[(TEXTNS, u"changed-region")] = (odhandler.s_ignorexml,None)
- result = odhandler.odf2xhtml(self.input_file).encode('UTF-8','xmlcharrefreplace')
- self.braille_text = self.translator.translate_string (result)
-
- def translate_xml(self):
- self.braille_text = self.translator.translate_file (self.input_file)
-
def get_braille_text(self):
return self.braille_text
-
-
-if __name__ == "__main__":
- import sys
- from config_builder import ConfigBuilder
-
- if sys.argv[1] is None:
- print sys.argv[0] + " [file]"
- d = Document(sys.argv[1])
- config_builder = ConfigBuilder()
-
- config_builder['xml']['semanticFiles'] = '*'
- config_builder['xml']['semanticFiles'] += ',nemeth.sem'
- config_builder['xml']['internetAccess'] = 'yes'
- config_builder['translation']['literaryTextTable'] = "Es-Es-g1.utb"
- config_builder['outputFormat']['cellsPerLine'] = 40
- config_builder['outputFormat']['braillePages'] = 'yes'
- config_builder['outputFormat']['formatFor'] = 'textDevice'
- config_builder['outputFormat']['LinesPerPage'] = 25
- config_builder['outputFormat']['braillePageNumberAt'] = 'bottom'
-
- d.translate (config_builder)
- print d.get_braille_text()
-
-
-
diff --git a/dots/document_builder.py b/dots/document_builder.py
new file mode 100644
index 0000000..bc4e966
--- /dev/null
+++ b/dots/document_builder.py
@@ -0,0 +1,57 @@
+# Dots - A braille translation program.
+#
+# Copyright (C) 2010 Consorcio Fernando de los Rios
+# Author: Fernando Herrera <fherrera onirica com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import mimetypes
+from document import Document
+from odtdocument import OdtDocument
+from pdfdocument import PdfDocument
+from xmldocument import XmlDocument
+
+def document_new(filename):
+ mime_type, encoding = mimetypes.guess_type (filename)
+ if mime_type == "application/pdf":
+ doc = PdfDocument(filename)
+ elif mime_type == "application/vnd.oasis.opendocument.text":
+ doc = OdtDocument(filename)
+ else:
+ doc = XmlDocument(filename)
+ return doc
+
+
+if __name__ == "__main__":
+ import sys
+ from config_builder import ConfigBuilder
+
+ if sys.argv[1] is None:
+ print sys.argv[0] + " [file]"
+ d = document_new (sys.argv[1])
+ config_builder = ConfigBuilder()
+
+ config_builder['xml']['semanticFiles'] = '*'
+ config_builder['xml']['semanticFiles'] += ',nemeth.sem'
+ config_builder['xml']['internetAccess'] = 'yes'
+ config_builder['translation']['literaryTextTable'] = "Es-Es-g1.utb"
+ config_builder['outputFormat']['cellsPerLine'] = 40
+ config_builder['outputFormat']['braillePages'] = 'yes'
+ config_builder['outputFormat']['formatFor'] = 'textDevice'
+ config_builder['outputFormat']['LinesPerPage'] = 25
+ config_builder['outputFormat']['braillePageNumberAt'] = 'bottom'
+
+ d.translate (config_builder)
+ print d.get_braille_text ()
+
diff --git a/dots/import_assistant.py b/dots/import_assistant.py
index bd5a373..e5de893 100644
--- a/dots/import_assistant.py
+++ b/dots/import_assistant.py
@@ -20,8 +20,8 @@
import gtk, glib
import os, tempfile
from config_builder import ConfigBuilder
+from document_builder import document_new
import host_settings
-from document import Document
TABLES_DIR = host_settings.tablesdir
@@ -118,7 +118,7 @@ class ImportAssistant(object):
def _onAssistantApply(self, assistant):
filename = self.main_xml.get_object('doc_file_choose_button').get_filename()
- self.document = Document (filename)
+ self.document = document_new (filename)
if self.main_app:
diff --git a/dots/odtdocument.py b/dots/odtdocument.py
new file mode 100644
index 0000000..3d0f71f
--- /dev/null
+++ b/dots/odtdocument.py
@@ -0,0 +1,33 @@
+# Dots - A braille translation program.
+#
+# Copyright (C) 2010 Consorcio Fernando de los Rios
+# Author: Fernando Herrera <fherrera onirica com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import mimetypes
+from document import Document
+from translator import Translator
+from odf.odf2xhtml import TEXTNS
+from odf.odf2xhtml import ODF2XHTML
+
+class OdtDocument(Document):
+
+ def translate(self, config):
+ self.translator = Translator(config)
+ odhandler = ODF2XHTML (False, False)
+ odhandler.elements[(TEXTNS, u"changed-region")] = (odhandler.s_ignorexml,None)
+ result = odhandler.odf2xhtml(self.input_file).encode('UTF-8','xmlcharrefreplace')
+ self.braille_text = self.translator.translate_string (result)
+
diff --git a/dots/pdfdocument.py b/dots/pdfdocument.py
new file mode 100644
index 0000000..68f39db
--- /dev/null
+++ b/dots/pdfdocument.py
@@ -0,0 +1,46 @@
+# Dots - A braille translation program.
+#
+# Copyright (C) 2010 Consorcio Fernando de los Rios
+# Author: Fernando Herrera <fherrera onirica com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import mimetypes
+from document import Document
+from translator import Translator
+import poppler
+
+class PdfDocument(Document):
+ def translate(self, config):
+ self.translator = Translator(config)
+ uri = "file://" + self.input_file
+ document = poppler.document_new_from_file (uri, None)
+ npages = document.get_n_pages()
+ text = ""
+ for p in range(0,npages):
+ page = document.get_page(p)
+ w,h = page.get_size()
+ r = poppler.Rectangle ()
+ r.x1 = 0
+ r.x2 = w
+ r.y1 = 0
+ r.y2 = h
+ # Currently we are getting the layout from the pdf here
+ # we should collapse it
+ text += page.get_text(poppler.SELECTION_LINE,r)
+ self.braille_text = self.translator.translate_string (text)
+ return
+
+
+
diff --git a/dots/xmldocument.py b/dots/xmldocument.py
new file mode 100644
index 0000000..39061b1
--- /dev/null
+++ b/dots/xmldocument.py
@@ -0,0 +1,29 @@
+# Dots - A braille translation program.
+#
+# Copyright (C) 2010 Consorcio Fernando de los Rios
+# Author: Fernando Herrera <fherrera onirica com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import mimetypes
+from document import Document
+from translator import Translator
+
+class XmlDocument(Document):
+
+ def translate(self, config):
+ self.translator = Translator(config)
+ self.braille_text = self.translator.translate_file (self.input_file)
+
+
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]