[ocrfeeder] Add a support for plain text exportation
- From: Joaquim Manuel Pereira Rocha <jrocha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ocrfeeder] Add a support for plain text exportation
- Date: Sat, 12 Mar 2011 12:18:59 +0000 (UTC)
commit 98d400ae2a0a9172de0d825d47f9e36a0506b557
Author: Andrew McGrath <amcgrath wnec gmail com>
Date: Mon Feb 21 02:15:53 2011 -0500
Add a support for plain text exportation
help/C/documentgeneration.page | 4 +-
src/ocrfeeder/feeder/documentGeneration.py | 20 +++++++++++++++++
src/ocrfeeder/studio/studioBuilder.py | 5 +++-
src/ocrfeeder/studio/widgetModeler.py | 33 ++++++++++++++++++++++------
4 files changed, 52 insertions(+), 10 deletions(-)
---
diff --git a/help/C/documentgeneration.page b/help/C/documentgeneration.page
index bbd3abe..c9bc483 100644
--- a/help/C/documentgeneration.page
+++ b/help/C/documentgeneration.page
@@ -11,8 +11,8 @@
<title>Document Generation</title>
-<p><app>OCRFeeder</app> currently generates two document formats:
-<em>ODT</em> and <em>HTML</em>.</p>
+<p><app>OCRFeeder</app> currently generates three document formats:
+<em>ODT</em> <em>HTML</em> and <em>Plain Text</em>.</p>
<p>After the recognition and eventual manual edition has been
performed, it is possible to generate a document by clicking
diff --git a/src/ocrfeeder/feeder/documentGeneration.py b/src/ocrfeeder/feeder/documentGeneration.py
index 7d1530c..6082551 100644
--- a/src/ocrfeeder/feeder/documentGeneration.py
+++ b/src/ocrfeeder/feeder/documentGeneration.py
@@ -329,3 +329,23 @@ class OdtGenerator(DocumentGenerator):
if weight == WEIGHT_BOLD:
return 'bold'
return 'normal'
+
+# Generates a .txt file
+class PlaintextGenerator(DocumentGenerator):
+ def __init__(self, name):
+ self.name = name
+ self.text = ''
+
+ def addText(self, newText):
+ self.text += newText
+
+ def save(self):
+ try:
+ # This will create a new file or **overwrite an existing file
+ f = open(self.name, "w")
+ try:
+ f.write(self.text) # Write text to file
+ finally:
+ f.close() # Close the file
+ except IOError:
+ pass
diff --git a/src/ocrfeeder/studio/studioBuilder.py b/src/ocrfeeder/studio/studioBuilder.py
index d0b5e0f..b88f0ad 100644
--- a/src/ocrfeeder/studio/studioBuilder.py
+++ b/src/ocrfeeder/studio/studioBuilder.py
@@ -42,7 +42,7 @@ import gtk
class Studio:
- EXPORT_FORMATS = ['HTML', 'ODT']
+ EXPORT_FORMATS = ['HTML', 'ODT', 'PlainText']
TARGET_TYPE_URI_LIST = 80
def __init__(self):
@@ -279,6 +279,9 @@ class Studio:
def exportToOdt(self, widget = None):
self.source_images_controler.exportPagesToOdt(self.source_images_selector.getPixbufsSorted())
+ def exportToPlaintext(self, widget = None):
+ self.source_images_controler.exportPagesToPlaintext(self.source_images_selector.getPixbufsSorted())
+
def exportDialog(self, widget):
export_dialog = widgetPresenter.ExportDialog(_('Export pages'), self.EXPORT_FORMATS)
response = export_dialog.run()
diff --git a/src/ocrfeeder/studio/widgetModeler.py b/src/ocrfeeder/studio/widgetModeler.py
index 44f407b..e7804d6 100644
--- a/src/ocrfeeder/studio/widgetModeler.py
+++ b/src/ocrfeeder/studio/widgetModeler.py
@@ -20,7 +20,7 @@
from customWidgets import SelectableBoxesArea
from dataHolder import DataBox, PageData, TEXT_TYPE, IMAGE_TYPE
-from ocrfeeder.feeder.documentGeneration import OdtGenerator, HtmlGenerator
+from ocrfeeder.feeder.documentGeneration import OdtGenerator, HtmlGenerator, PlaintextGenerator
from ocrfeeder.feeder.imageManipulation import *
from ocrfeeder.feeder.layoutAnalysis import *
from pango import FontDescription, SCALE
@@ -304,14 +304,13 @@ class ImageReviewer:
editor.box_editor.setText('')
self.updateMainWindow()
- def copyTextToClipboard(self):
+ def getTextFromBoxes(self, boxes):
text = ''
- selected_boxes = self.selectable_boxes_area.getSelectedAreas()
- selected_boxes.reverse()
- if selected_boxes:
- number_of_boxes = len(selected_boxes)
+ boxes.reverse()
+ if boxes:
+ number_of_boxes = len(boxes)
for i in range(number_of_boxes):
- box = selected_boxes[i]
+ box = boxes[i]
text += self.__getEditorFromBox(box).box_editor.getText()
if number_of_boxes > 1 and i < number_of_boxes - 1:
text += '\n\n'
@@ -319,8 +318,17 @@ class ImageReviewer:
current_box_editor = self.boxeditor_notebook.get_nth_page(\
self.boxeditor_notebook.get_current_page())
text = current_box_editor.getText()
+ return text
+
+ def copyTextToClipboard(self):
+ selected_boxes = self.selectable_boxes_area.getSelectedAreas()
+ text = self.getTextFromBoxes(selected_boxes)
gtk.Clipboard().set_text(text)
+ def getAllText(self):
+ boxes = self.selectable_boxes_area.getAllAreas()
+ return self.getTextFromBoxes(boxes)
+
def __getAllDataBoxes(self):
boxes = []
for editor in self.editor_list:
@@ -633,6 +641,17 @@ class ImageReviewer_Controler:
document_generator.addPage(image_reviewer.getPageData())
document_generator.save()
+ def exportPagesToPlaintext(self, pixbufs_sorted = []):
+ image_reviewers = self.__askForNumberOfPages(_('Export to Plain Text'), pixbufs_sorted)
+ if not image_reviewers:
+ return
+ file_name = self.__askForFileName()
+ if file_name:
+ document_generator = PlaintextGenerator(file_name)
+ for image_reviewer in image_reviewers:
+ document_generator.addText(image_reviewer.getAllText())
+ document_generator.save()
+
def saveProjectAs(self):
return self.__askForFileName(extension = '.ocrf')
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]