[dots/remove-assistant: 36/38] Use an alternative pigy poppler based backed for pdf



commit 32360f4a4f57acab36cd69e240b5f771e7532650
Author: Fernando Herrera <fherrera onirica com>
Date:   Thu Jul 15 03:04:34 2010 +0200

    Use an alternative pigy poppler based backed for pdf

 TODO                                              |    4 ++-
 bin/dotstableeditor                               |    1 +
 dots/Makefile.am                                  |    3 +-
 dots/app_window.py                                |    1 +
 dots/document_builder.py                          |    6 +++-
 dots/{pdfdocument.py => pdfdocument_gi.py}        |   41 ++++++++++++++-------
 dots/{pdfdocument.py => pdfdocument_pypoppler.py} |   30 ++++++++++++---
 7 files changed, 63 insertions(+), 23 deletions(-)
---
diff --git a/TODO b/TODO
index fbdf21c..4f38bd1 100644
--- a/TODO
+++ b/TODO
@@ -1,5 +1,7 @@
 - Put cursor at beginning of textviews when they are exposed.
-- Nice error handling when importing odt documents.
+- Nice error handling:
+	when importing odt documents.
+	when pdf has no text
 - Edit the text
 - .desktop file
 - basic packaging
diff --git a/bin/dotstableeditor b/bin/dotstableeditor
index 9164deb..77eb4c0 100755
--- a/bin/dotstableeditor
+++ b/bin/dotstableeditor
@@ -28,6 +28,7 @@
 import os
 from sys import argv
 from dots import host_settings
+import pygtk
 import gtk
 import re
 
diff --git a/dots/Makefile.am b/dots/Makefile.am
index e0c9f1b..643d96c 100644
--- a/dots/Makefile.am
+++ b/dots/Makefile.am
@@ -8,7 +8,8 @@ dots_PYTHON = \
 	host_settings.py \
 	config_builder.py \
 	document.py \
-	pdfdocument.py \
+	pdfdocument_gi.py \
+	pdfdocument_pypoppler.py \
 	odtdocument.py \
 	xmldocument.py \
 	document_builder.py \
diff --git a/dots/app_window.py b/dots/app_window.py
index d389200..b9bc17b 100644
--- a/dots/app_window.py
+++ b/dots/app_window.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+import pygtk
 import gtk, glib
 import os, tempfile
 from config_builder import ConfigBuilder
diff --git a/dots/document_builder.py b/dots/document_builder.py
index bc4e966..6fb3b32 100644
--- a/dots/document_builder.py
+++ b/dots/document_builder.py
@@ -19,8 +19,12 @@
 import mimetypes
 from document import Document
 from odtdocument import OdtDocument
-from pdfdocument import PdfDocument
 from xmldocument import XmlDocument
+try:
+	from pdfdocument_gi import PdfDocument
+except:
+	from pdfdocument_pypoppler import PdfDocument
+	pass
 
 def document_new(filename):
 	mime_type, encoding = mimetypes.guess_type (filename)
diff --git a/dots/pdfdocument.py b/dots/pdfdocument_gi.py
similarity index 74%
copy from dots/pdfdocument.py
copy to dots/pdfdocument_gi.py
index c030647..7c6d63f 100644
--- a/dots/pdfdocument.py
+++ b/dots/pdfdocument_gi.py
@@ -18,30 +18,43 @@
 
 from document import Document
 from translator import Translator
-import poppler
+import pygtk
+pygtk.require('2.0')
+from gi.repository import Poppler
 
 class PdfDocument(Document):
-    def translate(self, config):
-	# FIXME: Check if poppler gives us always UTF-8 strings
-	config['outputFormat']['inputTextEncoding'] = "UTF8"
-	self.translator = Translator(config)
-	uri = "file://" + self.input_file
-	document = poppler.document_new_from_file (uri, None)
+    def _get_text (self, file):
+	uri = "file://" + file
+	document = Poppler.Document.new_from_file (uri, "")
 	npages = document.get_n_pages()
 	text = ""
 	for p in range(0,npages):
 		page = document.get_page(p)
-		w,h = page.get_size()
-		r = poppler.Rectangle ()
-		r.x1 = 0
-		r.x2 = w
-		r.y1 = 0
-		r.y2 = h
 		# Currently we are getting the layout from the pdf here
 		# we should collapse it
-		text += page.get_text(poppler.SELECTION_LINE,r)
+		text += page.get_text()
+
+	return text
+
+	 
+    def translate(self, config):
+	# FIXME: Check if poppler gives us always UTF-8 strings
+	config['outputFormat']['inputTextEncoding'] = "UTF8"
+	self.translator = Translator(config)
+	text = self._get_text(self.input_file)
 	self.braille_text = self.translator.translate_string (text)
 	return
 
+
+
+if __name__ == "__main__":
+        import sys
+
+        if len(sys.argv) > 1:
+		document = PdfDocument(sys.argv[1])
+		print document._get_text(sys.argv[1])
+
+
+		PdfDocument._get_text
 	
 		
diff --git a/dots/pdfdocument.py b/dots/pdfdocument_pypoppler.py
similarity index 80%
rename from dots/pdfdocument.py
rename to dots/pdfdocument_pypoppler.py
index c030647..ab6dbbf 100644
--- a/dots/pdfdocument.py
+++ b/dots/pdfdocument_pypoppler.py
@@ -21,11 +21,8 @@ from translator import Translator
 import poppler
 
 class PdfDocument(Document):
-    def translate(self, config):
-	# FIXME: Check if poppler gives us always UTF-8 strings
-	config['outputFormat']['inputTextEncoding'] = "UTF8"
-	self.translator = Translator(config)
-	uri = "file://" + self.input_file
+    def _get_text (self, file):
+	uri = "file://" + file
 	document = poppler.document_new_from_file (uri, None)
 	npages = document.get_n_pages()
 	text = ""
@@ -39,9 +36,30 @@ class PdfDocument(Document):
 		r.y2 = h
 		# Currently we are getting the layout from the pdf here
 		# we should collapse it
-		text += page.get_text(poppler.SELECTION_LINE,r)
+		text += page.get_text(poppler.SELECTION_GLYPH,r)
+		print text
+
+	return text
+
+	 
+    def translate(self, config):
+	# FIXME: Check if poppler gives us always UTF-8 strings
+	config['outputFormat']['inputTextEncoding'] = "UTF8"
+	self.translator = Translator(config)
+	text = self._get_text(config.input_file)
 	self.braille_text = self.translator.translate_string (text)
 	return
 
+
+
+if __name__ == "__main__":
+        import sys
+
+        if len(sys.argv) > 1:
+		document = PdfDocument(sys.argv[1])
+		print document._get_text(sys.argv[1])
+
+
+		PdfDocument._get_text
 	
 		



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]