[ocrfeeder] Add the searchable PDF option to the exportation formats



commit ad94cc05a8d56f1e8ba617dc5b832ca8300defda
Author: Joaquim Rocha <jrocha igalia com>
Date:   Tue Aug 2 23:37:56 2011 +0200

    Add the searchable PDF option to the exportation formats
    
    Allows choosing the type of the PDF to be exported: from scratch or
    a searchable PDF.

 src/ocrfeeder/feeder/documentGeneration.py |   21 +++++++++++++++--
 src/ocrfeeder/studio/studioBuilder.py      |   32 +++++++++++++++++++++++++++-
 src/ocrfeeder/studio/widgetModeler.py      |    4 +-
 3 files changed, 51 insertions(+), 6 deletions(-)
---
diff --git a/src/ocrfeeder/feeder/documentGeneration.py b/src/ocrfeeder/feeder/documentGeneration.py
index 3b2b2be..ffa768a 100644
--- a/src/ocrfeeder/feeder/documentGeneration.py
+++ b/src/ocrfeeder/feeder/documentGeneration.py
@@ -355,14 +355,19 @@ class PlaintextGenerator(DocumentGenerator):
 
 class PdfGenerator(DocumentGenerator):
 
-    def __init__(self, name):
+    def __init__(self, name, from_scratch = False):
         self.name = name
+        self._from_scratch = from_scratch
         self.canvas = canvas.Canvas(self.name)
         self.page_data = None
 
     def addText(self, box):
         x, y, width, height = box.getBoundsPrintSize(self.page_data.resolution)
         text = self.canvas.beginText()
+        # Make the text transparent if we are not
+        # creating a PDF from scratch
+        if not self._from_scratch:
+            text.setTextRenderMode(3)
         text.setTextOrigin(x * units.inch,
                            (self.page_data.height - y) * units.inch)
         text.setCharSpace(box.text_data.letter_space)
@@ -374,11 +379,14 @@ class PdfGenerator(DocumentGenerator):
         except:
             debug('Error setting font %s' % box.text_data.face)
             self.canvas.setFontSize(box.text_data.size)
-        for line in box.text.split('\n'):
-            text.textLine(line)
+        text.textLines(box.text)
         self.canvas.drawText(text)
 
     def addImage(self, box):
+        # Do nothing as the images will be already
+        # seen in the PDF
+        if not self._from_scratch:
+            return
         x, y, width, height = box.getBoundsPrintSize(self.page_data.resolution)
         self.canvas.drawInlineImage(box.image,
                                     x * units.inch,
@@ -391,6 +399,13 @@ class PdfGenerator(DocumentGenerator):
         self.canvas.setPageSize((page_data.width * units.inch,
                                  page_data.height * units.inch))
         self.page_data = page_data
+        # Paste the source image that users will read
+        # in the PDF
+        if not self._from_scratch:
+            image = ImageReader(page_data.image_path)
+            self.canvas.drawImage(image, 0, 0,
+                                  page_data.width * units.inch,
+                                  page_data.height * units.inch)
         self.addBoxes(page_data.data_boxes)
         self.canvas.showPage()
 
diff --git a/src/ocrfeeder/studio/studioBuilder.py b/src/ocrfeeder/studio/studioBuilder.py
index 33d34c2..b1c5944 100644
--- a/src/ocrfeeder/studio/studioBuilder.py
+++ b/src/ocrfeeder/studio/studioBuilder.py
@@ -292,7 +292,37 @@ class Studio:
         self.source_images_controler.exportPagesToPlaintext(self.source_images_selector.getPixbufsSorted())
 
     def exportToPdf(self, widget = None):
-        self.source_images_controler.exportPagesToPdf(self.source_images_selector.getPixbufsSorted())
+        ask_pdf_type_dialog = gtk.MessageDialog(self.main_window.window,
+                      gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
+                      buttons = gtk.BUTTONS_OK_CANCEL)
+        ask_pdf_type_dialog.set_markup(_('What kind of PDF document do you '
+                                         'wish?'))
+        pdf_from_scratch_radio = gtk.RadioButton(label= _('From scratch'))
+        pdf_from_scratch_radio.set_tooltip_text(
+                                    _('Creates a new PDF from scratch.'))
+        searchable_pdf_radio = gtk.RadioButton(pdf_from_scratch_radio,
+                                               _('Searchable PDF'))
+        searchable_pdf_radio.set_tooltip_text(_('Creates a PDF based on '
+                                                'the images but with searchable '
+                                                'text.'))
+        vbox = gtk.VBox(True)
+        vbox.add(pdf_from_scratch_radio)
+        vbox.add(searchable_pdf_radio)
+        content_area = ask_pdf_type_dialog.get_content_area()
+        content_area.add(vbox)
+        content_area.show_all()
+
+        response = ask_pdf_type_dialog.run()
+        ask_pdf_type_dialog.destroy()
+        if response == gtk.RESPONSE_CANCEL:
+            return
+
+        pdf_from_scratch = True
+        if searchable_pdf_radio.get_active():
+            pdf_from_scratch = False
+        self.source_images_controler.exportPagesToPdf(
+             self.source_images_selector.getPixbufsSorted(),
+             pdf_from_scratch)
 
     def exportDialog(self, widget):
         format_names = [format[1] for format in self.EXPORT_FORMATS.values()]
diff --git a/src/ocrfeeder/studio/widgetModeler.py b/src/ocrfeeder/studio/widgetModeler.py
index 0adb850..fe8544b 100644
--- a/src/ocrfeeder/studio/widgetModeler.py
+++ b/src/ocrfeeder/studio/widgetModeler.py
@@ -709,14 +709,14 @@ class ImageReviewer_Controler:
                 document_generator.addText(image_reviewer.getAllText())
             document_generator.save()
 
-    def exportPagesToPdf(self, pixbufs_sorted = []):
+    def exportPagesToPdf(self, pixbufs_sorted = [], pdf_from_scratch = True):
         image_reviewers = self.__askForNumberOfPages(_('Export to PDF'),
                                                      pixbufs_sorted)
         if not image_reviewers:
             return
         file_name = self.__askForFileName()
         if file_name:
-            document_generator = PdfGenerator(file_name)
+            document_generator = PdfGenerator(file_name, pdf_from_scratch)
             for image_reviewer in image_reviewers:
                 document_generator.addPage(image_reviewer.getPageData())
             document_generator.save()



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]