[ocrfeeder] Fix wrong font size calculations when there are initial characters



commit 990ec303f61ed8f27796a4399b0306be055080a4
Author: Joaquim Rocha <jrocha igalia com>
Date:   Thu Jul 8 11:19:12 2010 +0200

    Fix wrong font size calculations when there are initial characters
    
    layoutAnalysis.LayoutAnalysis: Use only the right half of an image when
    calculating the font size, this will prevent measuring eventual initial
    characters which create the "illusion" of a much larger line.

 feeder/layoutAnalysis.py |    9 +++++++--
 1 files changed, 7 insertions(+), 2 deletions(-)
---
diff --git a/feeder/layoutAnalysis.py b/feeder/layoutAnalysis.py
index 5da0498..63b827e 100644
--- a/feeder/layoutAnalysis.py
+++ b/feeder/layoutAnalysis.py
@@ -440,13 +440,18 @@ class LayoutAnalysis(object):
         return data_box
 
     def getTextSizeFromImage(self, image, page_resolution):
-        text_size = graphics.getTextSizeFromImage(image)
+        width, height = image.size
+        # We get the right half of the image only because this
+        # way we avoid measuring eventual "initial chars" which
+        # leads to false text sizes (obviously this will fail
+        # for right-to-left languages)
+        image_right_half = image.crop((width / 2, 0, width, height))
+        text_size = graphics.getTextSizeFromImage(image_right_half)
         if not text_size:
             return None
         y_resolution = float(page_resolution)
         text_size /= y_resolution
         text_size *= DTP
-        print text_size
         return round(text_size)
 
     def readImage(self, image):



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]