[ocrfeeder] Add a support for plain text exportation



commit 98d400ae2a0a9172de0d825d47f9e36a0506b557
Author: Andrew McGrath <amcgrath wnec gmail com>
Date:   Mon Feb 21 02:15:53 2011 -0500

    Add a support for plain text exportation

 help/C/documentgeneration.page             |    4 +-
 src/ocrfeeder/feeder/documentGeneration.py |   20 +++++++++++++++++
 src/ocrfeeder/studio/studioBuilder.py      |    5 +++-
 src/ocrfeeder/studio/widgetModeler.py      |   33 ++++++++++++++++++++++------
 4 files changed, 52 insertions(+), 10 deletions(-)
---
diff --git a/help/C/documentgeneration.page b/help/C/documentgeneration.page
index bbd3abe..c9bc483 100644
--- a/help/C/documentgeneration.page
+++ b/help/C/documentgeneration.page
@@ -11,8 +11,8 @@
 
 <title>Document Generation</title>
 
-<p><app>OCRFeeder</app> currently generates two document formats:
-<em>ODT</em> and <em>HTML</em>.</p>
+<p><app>OCRFeeder</app> currently generates three document formats:
+<em>ODT</em> <em>HTML</em> and <em>Plain Text</em>.</p>
 
 <p>After the recognition and eventual manual edition has been
 performed, it is possible to generate a document by clicking
diff --git a/src/ocrfeeder/feeder/documentGeneration.py b/src/ocrfeeder/feeder/documentGeneration.py
index 7d1530c..6082551 100644
--- a/src/ocrfeeder/feeder/documentGeneration.py
+++ b/src/ocrfeeder/feeder/documentGeneration.py
@@ -329,3 +329,23 @@ class OdtGenerator(DocumentGenerator):
         if weight == WEIGHT_BOLD:
             return 'bold'
         return 'normal'
+
+# Generates a .txt file
+class PlaintextGenerator(DocumentGenerator):
+    def __init__(self, name):
+        self.name = name
+        self.text = ''
+        
+    def addText(self, newText):
+        self.text += newText
+
+    def save(self):
+        try:
+            # This will create a new file or **overwrite an existing file
+            f = open(self.name, "w")
+            try:
+                f.write(self.text) # Write text to file
+            finally:
+                f.close() # Close the file
+        except IOError:
+            pass
diff --git a/src/ocrfeeder/studio/studioBuilder.py b/src/ocrfeeder/studio/studioBuilder.py
index d0b5e0f..b88f0ad 100644
--- a/src/ocrfeeder/studio/studioBuilder.py
+++ b/src/ocrfeeder/studio/studioBuilder.py
@@ -42,7 +42,7 @@ import gtk
 
 class Studio:
 
-    EXPORT_FORMATS = ['HTML', 'ODT']
+    EXPORT_FORMATS = ['HTML', 'ODT', 'PlainText']
     TARGET_TYPE_URI_LIST = 80
 
     def __init__(self):
@@ -279,6 +279,9 @@ class Studio:
     def exportToOdt(self, widget = None):
         self.source_images_controler.exportPagesToOdt(self.source_images_selector.getPixbufsSorted())
 
+    def exportToPlaintext(self, widget = None):
+        self.source_images_controler.exportPagesToPlaintext(self.source_images_selector.getPixbufsSorted())
+
     def exportDialog(self, widget):
         export_dialog = widgetPresenter.ExportDialog(_('Export pages'), self.EXPORT_FORMATS)
         response = export_dialog.run()
diff --git a/src/ocrfeeder/studio/widgetModeler.py b/src/ocrfeeder/studio/widgetModeler.py
index 44f407b..e7804d6 100644
--- a/src/ocrfeeder/studio/widgetModeler.py
+++ b/src/ocrfeeder/studio/widgetModeler.py
@@ -20,7 +20,7 @@
 
 from customWidgets import SelectableBoxesArea
 from dataHolder import DataBox, PageData, TEXT_TYPE, IMAGE_TYPE
-from ocrfeeder.feeder.documentGeneration import OdtGenerator, HtmlGenerator
+from ocrfeeder.feeder.documentGeneration import OdtGenerator, HtmlGenerator, PlaintextGenerator
 from ocrfeeder.feeder.imageManipulation import *
 from ocrfeeder.feeder.layoutAnalysis import *
 from pango import FontDescription, SCALE
@@ -304,14 +304,13 @@ class ImageReviewer:
                 editor.box_editor.setText('')
         self.updateMainWindow()
 
-    def copyTextToClipboard(self):
+    def getTextFromBoxes(self, boxes):
         text = ''
-        selected_boxes = self.selectable_boxes_area.getSelectedAreas()
-        selected_boxes.reverse()
-        if selected_boxes:
-            number_of_boxes = len(selected_boxes)
+        boxes.reverse()
+        if boxes:
+            number_of_boxes = len(boxes)
             for i in range(number_of_boxes):
-                box = selected_boxes[i]
+                box = boxes[i]
                 text += self.__getEditorFromBox(box).box_editor.getText()
                 if number_of_boxes > 1 and i < number_of_boxes - 1:
                     text += '\n\n'
@@ -319,8 +318,17 @@ class ImageReviewer:
             current_box_editor = self.boxeditor_notebook.get_nth_page(\
                                      self.boxeditor_notebook.get_current_page())
             text = current_box_editor.getText()
+        return text
+
+    def copyTextToClipboard(self):
+        selected_boxes = self.selectable_boxes_area.getSelectedAreas()
+        text = self.getTextFromBoxes(selected_boxes)
         gtk.Clipboard().set_text(text)
 
+    def getAllText(self):
+        boxes = self.selectable_boxes_area.getAllAreas()
+        return self.getTextFromBoxes(boxes)
+
     def __getAllDataBoxes(self):
         boxes = []
         for editor in self.editor_list:
@@ -633,6 +641,17 @@ class ImageReviewer_Controler:
                 document_generator.addPage(image_reviewer.getPageData())
             document_generator.save()
 
+    def exportPagesToPlaintext(self, pixbufs_sorted = []):
+        image_reviewers = self.__askForNumberOfPages(_('Export to Plain Text'), pixbufs_sorted)
+        if not image_reviewers:
+            return
+        file_name = self.__askForFileName()
+        if file_name:
+            document_generator = PlaintextGenerator(file_name)
+            for image_reviewer in image_reviewers:
+                document_generator.addText(image_reviewer.getAllText())
+            document_generator.save()
+
     def saveProjectAs(self):
         return self.__askForFileName(extension = '.ocrf')
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]