[dots] Add support for MS Word files (old doc format, not XML docx).



commit b620bebe9ed7fa2304b86f8af5f1d07445cb6960
Author: Fernando Herrera <fherrera onirica com>
Date:   Thu Dec 2 02:47:03 2010 +0100

    Add support for MS Word files (old doc format, not XML docx).
    
    Use antiword to convert doc files to docbook and then translated them.
    
    Use a dict for matching mimetypes and Document subclasses translating them.
    
    Use try/pass for loading most of the Document subclasses, so if we don't have
    the proper libraries of binaries we can start the program without support for that
    kind of file.
    
    Do not hardcode filechooser filter, make it from out sucessful loaded Document
    subclasses.

 dots/Makefile.am         |    1 +
 dots/app_window.py       |   20 ++++++---------
 dots/docdocument.py      |   54 ++++++++++++++++++++++++++++++++++++++++
 dots/document_builder.py |   62 ++++++++++++++++++++++++++++++++++++++++------
 4 files changed, 117 insertions(+), 20 deletions(-)
---
diff --git a/dots/Makefile.am b/dots/Makefile.am
index 78e514d..7ccb280 100644
--- a/dots/Makefile.am
+++ b/dots/Makefile.am
@@ -8,6 +8,7 @@ dots_PYTHON = \
 	host_settings.py \
 	config_builder.py \
 	document.py \
+	docdocument.py \
 	pdfdocument_gi.py \
 	pdfdocument_pypoppler.py \
 	odtdocument.py \
diff --git a/dots/app_window.py b/dots/app_window.py
index dad80ed..0799b9a 100644
--- a/dots/app_window.py
+++ b/dots/app_window.py
@@ -28,6 +28,8 @@ import gtkunixprint
 import pango
 from dots_project import DotsProject
 from document_builder import document_new
+from document_builder import get_supported_mime_types
+from document_builder import get_supported_extensiones_patterns
 from config_builder import ConfigBuilder
 from table_editor import TableEditor
 from translator import Translator
@@ -223,18 +225,10 @@ class AppWindow(object):
 
 	filter = gtk.FileFilter()
 	filter.set_name(_("Documents"))
-	filter.add_mime_type("application/pdf")
-	filter.add_mime_type("application/vnd.oasis.opendocument.text")
-	filter.add_mime_type("application/xhtml+xml")
-	filter.add_mime_type("application/xml")
-	filter.add_mime_type("text/plain")
-	filter.add_mime_type("text/html")
-	filter.add_pattern("*.pdf")
-	filter.add_pattern("*.odt")
-	filter.add_pattern("*.txt")
-	filter.add_pattern("*.html")
-	filter.add_pattern("*.xhtml")
-	filter.add_pattern("*.xml")
+	for m in get_supported_mime_types():
+		filter.add_mime_type(m)
+	for p in get_supported_extensiones_patterns():
+		filter.add_pattern(p)
 	chooser.add_filter(filter)
 	chooser.set_filter(filter)
  	response = chooser.run()
@@ -242,6 +236,8 @@ class AppWindow(object):
 	chooser.destroy()
 	if response == gtk.RESPONSE_OK:
 		self.document = document_new (filename)
+		# TODO: Check for None here, although we should not get
+		#       non-supported mime-types from the filechooser
 		self.addDocument (self.document)
 		self.translate (self.document, self.config_builder)
 
diff --git a/dots/docdocument.py b/dots/docdocument.py
new file mode 100644
index 0000000..744751b
--- /dev/null
+++ b/dots/docdocument.py
@@ -0,0 +1,54 @@
+# Dots - A braille translation program.
+#
+# Copyright (C) 2010 Consorcio Fernando de los Rios
+#		Author: Fernando Herrera <fherrera onirica com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import subprocess
+from document import Document
+from translator import Translator
+
+def get_antiword():
+	for path in os.environ["PATH"].split(os.pathsep):
+            f = os.path.join(path, "antiword")
+            if os.path.exists(f) and os.access(f, os.X_OK):
+                return f
+	return None
+
+antiword = get_antiword()
+if antiword is None:
+	raise NameError('Antiword not found')
+
+class DocDocument(Document):
+
+    def _get_text(seff, file):
+	text = subprocess.check_output([antiword, "-x", "db", file])
+	return text
+
+    def translate(self, config):
+	config['outputFormat']['inputTextEncoding'] = "UTF8"
+	self.translator = Translator(config)
+	result = self._get_text (self.input_file)
+	self.braille_text = self.translator.translate_string (result)
+
+if __name__ == "__main__":
+        import sys
+
+        if len(sys.argv) > 1:
+                document = OdtDocument(sys.argv[1])
+                print document._get_text(sys.argv[1])
+
+
diff --git a/dots/document_builder.py b/dots/document_builder.py
index fc2d0dc..78777d9 100644
--- a/dots/document_builder.py
+++ b/dots/document_builder.py
@@ -16,31 +16,77 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+supported_mime_types = {}
+supported_extensions_patterns = []
+
 import mimetypes
 from document import Document
-from odtdocument import OdtDocument
+
 from xmldocument import XmlDocument
+supported_mime_types["application/xhtml+xml"] = XmlDocument
+supported_mime_types["application/xml"] = XmlDocument
+supported_mime_types["text/html"] = XmlDocument
+supported_extensions_patterns.append("*.xhtml")
+supported_extensions_patterns.append("*.xml")
+supported_extensions_patterns.append("*.html")
+
 from textdocument import TextDocument
+supported_mime_types["text/plain"] = TextDocument
+supported_extensions_patterns.append("*.text")
+try:
+	from odtdocument import OdtDocument
+	supported_mime_types["application/vnd.oasis.opendocument.text"] = OdtDocument
+	supported_extensions_patterns.append("*.odt")
+except:
+	pass
+
 try:
 	from pdfdocument_gi import PdfDocument
+	supported_mime_types["application/pdf"] = PdfDocument
+	supported_extensions_patterns.append("*.pdf")
+except:
+	try:
+		from pdfdocument_pypoppler import PdfDocument
+		supported_mime_types["application/pdf"] = PdfDocument
+		supported_extensions_patterns.append("*.pdf")
+	except:
+		pass
+	pass
+
+try:
+	from docdocument import DocDocument
+	supported_mime_types["application/msword"] = DocDocument
+	supported_extensions_patterns.append("*.doc")
+	supported_extensions_patterns.append("*.dot")
 except:
-	from pdfdocument_pypoppler import PdfDocument
 	pass
 
+
+
 def document_new(filename):
+	print supported_mime_types
 	if filename is None:
 		doc = TextDocument(None)
 		return doc
 
 	mime_type, encoding = mimetypes.guess_type (filename)
-	if mime_type == "application/pdf":
-		doc = PdfDocument(filename)
-	elif mime_type == "application/vnd.oasis.opendocument.text":
-		doc = OdtDocument(filename)
-	else:
-		doc = XmlDocument(filename)
+	print mime_type
+	if mime_type not in supported_mime_types:
+		# Try text/* mimetypes
+		if mime_type.startswith("text/"):
+			doc = TextDocument(filename)
+			return doc
+		return None
+
+	doc = supported_mime_types[mime_type] (filename)
 	return doc
 
+def get_supported_mime_types():
+	return supported_mime_types.keys()
+
+def get_supported_extensiones_patterns():
+	return supported_extensions_patterns
+
 
 if __name__ == "__main__":
 	import sys



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]