[dots] Add support for MS Word files (old doc format, not XML docx).
- From: Fernando Herrera de las Heras <fherrera src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dots] Add support for MS Word files (old doc format, not XML docx).
- Date: Thu, 2 Dec 2010 01:52:37 +0000 (UTC)
commit b620bebe9ed7fa2304b86f8af5f1d07445cb6960
Author: Fernando Herrera <fherrera onirica com>
Date: Thu Dec 2 02:47:03 2010 +0100
Add support for MS Word files (old doc format, not XML docx).
Use antiword to convert doc files to docbook and then translated them.
Use a dict for matching mimetypes and Document subclasses translating them.
Use try/pass for loading most of the Document subclasses, so if we don't have
the proper libraries of binaries we can start the program without support for that
kind of file.
Do not hardcode filechooser filter, make it from out sucessful loaded Document
subclasses.
dots/Makefile.am | 1 +
dots/app_window.py | 20 ++++++---------
dots/docdocument.py | 54 ++++++++++++++++++++++++++++++++++++++++
dots/document_builder.py | 62 ++++++++++++++++++++++++++++++++++++++++------
4 files changed, 117 insertions(+), 20 deletions(-)
---
diff --git a/dots/Makefile.am b/dots/Makefile.am
index 78e514d..7ccb280 100644
--- a/dots/Makefile.am
+++ b/dots/Makefile.am
@@ -8,6 +8,7 @@ dots_PYTHON = \
host_settings.py \
config_builder.py \
document.py \
+ docdocument.py \
pdfdocument_gi.py \
pdfdocument_pypoppler.py \
odtdocument.py \
diff --git a/dots/app_window.py b/dots/app_window.py
index dad80ed..0799b9a 100644
--- a/dots/app_window.py
+++ b/dots/app_window.py
@@ -28,6 +28,8 @@ import gtkunixprint
import pango
from dots_project import DotsProject
from document_builder import document_new
+from document_builder import get_supported_mime_types
+from document_builder import get_supported_extensiones_patterns
from config_builder import ConfigBuilder
from table_editor import TableEditor
from translator import Translator
@@ -223,18 +225,10 @@ class AppWindow(object):
filter = gtk.FileFilter()
filter.set_name(_("Documents"))
- filter.add_mime_type("application/pdf")
- filter.add_mime_type("application/vnd.oasis.opendocument.text")
- filter.add_mime_type("application/xhtml+xml")
- filter.add_mime_type("application/xml")
- filter.add_mime_type("text/plain")
- filter.add_mime_type("text/html")
- filter.add_pattern("*.pdf")
- filter.add_pattern("*.odt")
- filter.add_pattern("*.txt")
- filter.add_pattern("*.html")
- filter.add_pattern("*.xhtml")
- filter.add_pattern("*.xml")
+ for m in get_supported_mime_types():
+ filter.add_mime_type(m)
+ for p in get_supported_extensiones_patterns():
+ filter.add_pattern(p)
chooser.add_filter(filter)
chooser.set_filter(filter)
response = chooser.run()
@@ -242,6 +236,8 @@ class AppWindow(object):
chooser.destroy()
if response == gtk.RESPONSE_OK:
self.document = document_new (filename)
+ # TODO: Check for None here, although we should not get
+ # non-supported mime-types from the filechooser
self.addDocument (self.document)
self.translate (self.document, self.config_builder)
diff --git a/dots/docdocument.py b/dots/docdocument.py
new file mode 100644
index 0000000..744751b
--- /dev/null
+++ b/dots/docdocument.py
@@ -0,0 +1,54 @@
+# Dots - A braille translation program.
+#
+# Copyright (C) 2010 Consorcio Fernando de los Rios
+# Author: Fernando Herrera <fherrera onirica com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import subprocess
+from document import Document
+from translator import Translator
+
+def get_antiword():
+ for path in os.environ["PATH"].split(os.pathsep):
+ f = os.path.join(path, "antiword")
+ if os.path.exists(f) and os.access(f, os.X_OK):
+ return f
+ return None
+
+antiword = get_antiword()
+if antiword is None:
+ raise NameError('Antiword not found')
+
+class DocDocument(Document):
+
+ def _get_text(seff, file):
+ text = subprocess.check_output([antiword, "-x", "db", file])
+ return text
+
+ def translate(self, config):
+ config['outputFormat']['inputTextEncoding'] = "UTF8"
+ self.translator = Translator(config)
+ result = self._get_text (self.input_file)
+ self.braille_text = self.translator.translate_string (result)
+
+if __name__ == "__main__":
+ import sys
+
+ if len(sys.argv) > 1:
+ document = OdtDocument(sys.argv[1])
+ print document._get_text(sys.argv[1])
+
+
diff --git a/dots/document_builder.py b/dots/document_builder.py
index fc2d0dc..78777d9 100644
--- a/dots/document_builder.py
+++ b/dots/document_builder.py
@@ -16,31 +16,77 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
+supported_mime_types = {}
+supported_extensions_patterns = []
+
import mimetypes
from document import Document
-from odtdocument import OdtDocument
+
from xmldocument import XmlDocument
+supported_mime_types["application/xhtml+xml"] = XmlDocument
+supported_mime_types["application/xml"] = XmlDocument
+supported_mime_types["text/html"] = XmlDocument
+supported_extensions_patterns.append("*.xhtml")
+supported_extensions_patterns.append("*.xml")
+supported_extensions_patterns.append("*.html")
+
from textdocument import TextDocument
+supported_mime_types["text/plain"] = TextDocument
+supported_extensions_patterns.append("*.text")
+try:
+ from odtdocument import OdtDocument
+ supported_mime_types["application/vnd.oasis.opendocument.text"] = OdtDocument
+ supported_extensions_patterns.append("*.odt")
+except:
+ pass
+
try:
from pdfdocument_gi import PdfDocument
+ supported_mime_types["application/pdf"] = PdfDocument
+ supported_extensions_patterns.append("*.pdf")
+except:
+ try:
+ from pdfdocument_pypoppler import PdfDocument
+ supported_mime_types["application/pdf"] = PdfDocument
+ supported_extensions_patterns.append("*.pdf")
+ except:
+ pass
+ pass
+
+try:
+ from docdocument import DocDocument
+ supported_mime_types["application/msword"] = DocDocument
+ supported_extensions_patterns.append("*.doc")
+ supported_extensions_patterns.append("*.dot")
except:
- from pdfdocument_pypoppler import PdfDocument
pass
+
+
def document_new(filename):
+ print supported_mime_types
if filename is None:
doc = TextDocument(None)
return doc
mime_type, encoding = mimetypes.guess_type (filename)
- if mime_type == "application/pdf":
- doc = PdfDocument(filename)
- elif mime_type == "application/vnd.oasis.opendocument.text":
- doc = OdtDocument(filename)
- else:
- doc = XmlDocument(filename)
+ print mime_type
+ if mime_type not in supported_mime_types:
+ # Try text/* mimetypes
+ if mime_type.startswith("text/"):
+ doc = TextDocument(filename)
+ return doc
+ return None
+
+ doc = supported_mime_types[mime_type] (filename)
return doc
+def get_supported_mime_types():
+ return supported_mime_types.keys()
+
+def get_supported_extensiones_patterns():
+ return supported_extensions_patterns
+
if __name__ == "__main__":
import sys
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]