[ocrfeeder] Add the searchable PDF option to the exportation formats
- From: Joaquim Manuel Pereira Rocha <jrocha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ocrfeeder] Add the searchable PDF option to the exportation formats
- Date: Tue, 2 Aug 2011 21:51:44 +0000 (UTC)
commit ad94cc05a8d56f1e8ba617dc5b832ca8300defda
Author: Joaquim Rocha <jrocha igalia com>
Date: Tue Aug 2 23:37:56 2011 +0200
Add the searchable PDF option to the exportation formats
Allows choosing the type of the PDF to be exported: from scratch or
a searchable PDF.
src/ocrfeeder/feeder/documentGeneration.py | 21 +++++++++++++++--
src/ocrfeeder/studio/studioBuilder.py | 32 +++++++++++++++++++++++++++-
src/ocrfeeder/studio/widgetModeler.py | 4 +-
3 files changed, 51 insertions(+), 6 deletions(-)
---
diff --git a/src/ocrfeeder/feeder/documentGeneration.py b/src/ocrfeeder/feeder/documentGeneration.py
index 3b2b2be..ffa768a 100644
--- a/src/ocrfeeder/feeder/documentGeneration.py
+++ b/src/ocrfeeder/feeder/documentGeneration.py
@@ -355,14 +355,19 @@ class PlaintextGenerator(DocumentGenerator):
class PdfGenerator(DocumentGenerator):
- def __init__(self, name):
+ def __init__(self, name, from_scratch = False):
self.name = name
+ self._from_scratch = from_scratch
self.canvas = canvas.Canvas(self.name)
self.page_data = None
def addText(self, box):
x, y, width, height = box.getBoundsPrintSize(self.page_data.resolution)
text = self.canvas.beginText()
+ # Make the text transparent if we are not
+ # creating a PDF from scratch
+ if not self._from_scratch:
+ text.setTextRenderMode(3)
text.setTextOrigin(x * units.inch,
(self.page_data.height - y) * units.inch)
text.setCharSpace(box.text_data.letter_space)
@@ -374,11 +379,14 @@ class PdfGenerator(DocumentGenerator):
except:
debug('Error setting font %s' % box.text_data.face)
self.canvas.setFontSize(box.text_data.size)
- for line in box.text.split('\n'):
- text.textLine(line)
+ text.textLines(box.text)
self.canvas.drawText(text)
def addImage(self, box):
+ # Do nothing as the images will be already
+ # seen in the PDF
+ if not self._from_scratch:
+ return
x, y, width, height = box.getBoundsPrintSize(self.page_data.resolution)
self.canvas.drawInlineImage(box.image,
x * units.inch,
@@ -391,6 +399,13 @@ class PdfGenerator(DocumentGenerator):
self.canvas.setPageSize((page_data.width * units.inch,
page_data.height * units.inch))
self.page_data = page_data
+ # Paste the source image that users will read
+ # in the PDF
+ if not self._from_scratch:
+ image = ImageReader(page_data.image_path)
+ self.canvas.drawImage(image, 0, 0,
+ page_data.width * units.inch,
+ page_data.height * units.inch)
self.addBoxes(page_data.data_boxes)
self.canvas.showPage()
diff --git a/src/ocrfeeder/studio/studioBuilder.py b/src/ocrfeeder/studio/studioBuilder.py
index 33d34c2..b1c5944 100644
--- a/src/ocrfeeder/studio/studioBuilder.py
+++ b/src/ocrfeeder/studio/studioBuilder.py
@@ -292,7 +292,37 @@ class Studio:
self.source_images_controler.exportPagesToPlaintext(self.source_images_selector.getPixbufsSorted())
def exportToPdf(self, widget = None):
- self.source_images_controler.exportPagesToPdf(self.source_images_selector.getPixbufsSorted())
+ ask_pdf_type_dialog = gtk.MessageDialog(self.main_window.window,
+ gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
+ buttons = gtk.BUTTONS_OK_CANCEL)
+ ask_pdf_type_dialog.set_markup(_('What kind of PDF document do you '
+ 'wish?'))
+ pdf_from_scratch_radio = gtk.RadioButton(label= _('From scratch'))
+ pdf_from_scratch_radio.set_tooltip_text(
+ _('Creates a new PDF from scratch.'))
+ searchable_pdf_radio = gtk.RadioButton(pdf_from_scratch_radio,
+ _('Searchable PDF'))
+ searchable_pdf_radio.set_tooltip_text(_('Creates a PDF based on '
+ 'the images but with searchable '
+ 'text.'))
+ vbox = gtk.VBox(True)
+ vbox.add(pdf_from_scratch_radio)
+ vbox.add(searchable_pdf_radio)
+ content_area = ask_pdf_type_dialog.get_content_area()
+ content_area.add(vbox)
+ content_area.show_all()
+
+ response = ask_pdf_type_dialog.run()
+ ask_pdf_type_dialog.destroy()
+ if response == gtk.RESPONSE_CANCEL:
+ return
+
+ pdf_from_scratch = True
+ if searchable_pdf_radio.get_active():
+ pdf_from_scratch = False
+ self.source_images_controler.exportPagesToPdf(
+ self.source_images_selector.getPixbufsSorted(),
+ pdf_from_scratch)
def exportDialog(self, widget):
format_names = [format[1] for format in self.EXPORT_FORMATS.values()]
diff --git a/src/ocrfeeder/studio/widgetModeler.py b/src/ocrfeeder/studio/widgetModeler.py
index 0adb850..fe8544b 100644
--- a/src/ocrfeeder/studio/widgetModeler.py
+++ b/src/ocrfeeder/studio/widgetModeler.py
@@ -709,14 +709,14 @@ class ImageReviewer_Controler:
document_generator.addText(image_reviewer.getAllText())
document_generator.save()
- def exportPagesToPdf(self, pixbufs_sorted = []):
+ def exportPagesToPdf(self, pixbufs_sorted = [], pdf_from_scratch = True):
image_reviewers = self.__askForNumberOfPages(_('Export to PDF'),
pixbufs_sorted)
if not image_reviewers:
return
file_name = self.__askForFileName()
if file_name:
- document_generator = PdfGenerator(file_name)
+ document_generator = PdfGenerator(file_name, pdf_from_scratch)
for image_reviewer in image_reviewers:
document_generator.addPage(image_reviewer.getPageData())
document_generator.save()
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]