[dots] Initial pdf support using poppler



commit 5691478faa4da74034880eebe63365113fe9b344
Author: Fernando Herrera <fherrera onirica com>
Date:   Wed Jun 2 16:52:17 2010 +0200

    Initial pdf support using poppler

 TODO             |    3 ++-
 dots/document.py |   15 +++++++++++++++
 2 files changed, 17 insertions(+), 1 deletions(-)
---
diff --git a/TODO b/TODO
index 3554ab9..947d24b 100644
--- a/TODO
+++ b/TODO
@@ -1,7 +1,8 @@
 - Put cursor at beginning of textviews when they are exposed.
 - Add application window.
 
-- Add semantics for odt xml files
+- Add semantics for odt files (we are doing a conversion right now to xhtml)
+	If not, fix xhtml conversion to remove control changes
 - Add pdfextractor 
 
 For model/view branch:
diff --git a/dots/document.py b/dots/document.py
index 100871a..258ed35 100644
--- a/dots/document.py
+++ b/dots/document.py
@@ -39,6 +39,21 @@ class Document():
 		self.translate_xml ()
 
     def translate_pdf(self):
+	import poppler
+	uri = "file://" + self.input_file
+	document = poppler.document_new_from_file (uri, None)
+	npages = document.get_n_pages()
+	for p in range(0,npages-1):
+		print p
+		page = document.get_page(p)
+		w,h = page.get_size()
+		r = poppler.Rectangle ()
+		r.x1 = 0
+		r.x2 = w
+		r.y1 = 0
+		r.y2 = h
+		text = page.get_text(poppler.SELECTION_LINE,r)
+		print text
 	return
 
     def translate_odt(self):



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]