[ocrfeeder] Add the clean text variable to the LayoutAnalysis
- From: Joaquim Manuel Pereira Rocha <jrocha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ocrfeeder] Add the clean text variable to the LayoutAnalysis
- Date: Tue, 27 Jul 2010 22:14:03 +0000 (UTC)
commit 8e068e68102687c9a02f3508b66a8aaffb0e0891
Author: Joaquim Rocha <jrocha igalia com>
Date: Tue Jul 13 18:24:26 2010 +0200
Add the clean text variable to the LayoutAnalysis
layoutAnalysis.LayoutAnalysis: Add the mentioned variable and set the
cleaning of the text dependent on it.
feeder/layoutAnalysis.py | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
---
diff --git a/feeder/layoutAnalysis.py b/feeder/layoutAnalysis.py
index 6b8d2ce..29e603a 100644
--- a/feeder/layoutAnalysis.py
+++ b/feeder/layoutAnalysis.py
@@ -381,11 +381,13 @@ class LayoutAnalysis(object):
ocr_engine,
window_size = None,
improve_column_detection = True,
- column_size = None):
+ column_size = None,
+ clean_text = True):
self.ocr_engine = ocr_engine
self.window_size = window_size
self.column_size = column_size
self.improve_column_detection = improve_column_detection
+ self.clean_text = clean_text
def recognize(self, path_to_image, page_resolution):
image_processor = ImageProcessor(path_to_image,
@@ -458,7 +460,8 @@ class LayoutAnalysis(object):
def readImage(self, image):
self.ocr_engine.setImage(image)
text = self.ocr_engine.read()
- text = self.__cleanText(text)
+ if self.clean_text:
+ text = self.__cleanText(text)
return text
def __cleanText(self, text):
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]