[ocrfeeder] Add the clean text variable to the LayoutAnalysis



commit 8e068e68102687c9a02f3508b66a8aaffb0e0891
Author: Joaquim Rocha <jrocha igalia com>
Date:   Tue Jul 13 18:24:26 2010 +0200

    Add the clean text variable to the LayoutAnalysis
    
    layoutAnalysis.LayoutAnalysis: Add the mentioned variable and set the
    cleaning of the text dependent on it.

 feeder/layoutAnalysis.py |    7 +++++--
 1 files changed, 5 insertions(+), 2 deletions(-)
---
diff --git a/feeder/layoutAnalysis.py b/feeder/layoutAnalysis.py
index 6b8d2ce..29e603a 100644
--- a/feeder/layoutAnalysis.py
+++ b/feeder/layoutAnalysis.py
@@ -381,11 +381,13 @@ class LayoutAnalysis(object):
                  ocr_engine,
                  window_size = None,
                  improve_column_detection = True,
-                 column_size = None):
+                 column_size = None,
+                 clean_text = True):
         self.ocr_engine = ocr_engine
         self.window_size = window_size
         self.column_size = column_size
         self.improve_column_detection = improve_column_detection
+        self.clean_text = clean_text
 
     def recognize(self, path_to_image, page_resolution):
         image_processor = ImageProcessor(path_to_image,
@@ -458,7 +460,8 @@ class LayoutAnalysis(object):
     def readImage(self, image):
         self.ocr_engine.setImage(image)
         text = self.ocr_engine.read()
-        text = self.__cleanText(text)
+        if self.clean_text:
+            text = self.__cleanText(text)
         return text
 
     def __cleanText(self, text):



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]