[ocrfeeder] Add forgotten configuration.py



commit 97caf00b23a81e66874a140f559f02501477d899
Author: Joaquim Rocha <jrocha igalia com>
Date:   Fri Oct 28 19:23:52 2011 +0200

    Add forgotten configuration.py
    
    It had been moved before and should have been added to
    commit 05cee9eb4557a9d2dd4a1ee12a60a02bfc848024

 src/ocrfeeder/util/configuration.py |  385 +++++++++++++++++++++++++++++++++++
 1 files changed, 385 insertions(+), 0 deletions(-)
---
diff --git a/src/ocrfeeder/util/configuration.py b/src/ocrfeeder/util/configuration.py
new file mode 100644
index 0000000..187f140
--- /dev/null
+++ b/src/ocrfeeder/util/configuration.py
@@ -0,0 +1,385 @@
+# -*- coding: utf-8 -*-
+
+###########################################################################
+#    OCRFeeder - The complete OCR suite
+#    Copyright (C) 2009 Joaquim Rocha
+#
+#    This program is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+###########################################################################
+
+from ocrfeeder.feeder.ocrEngines import Engine
+from ocrfeeder.util.lib import getExecPath, debug
+from ocrfeeder.util.constants import OCRFEEDER_COMPACT_NAME
+import tempfile
+import shutil
+from xml.dom import minidom
+import os.path
+
+PREDEFINED_ENGINES = {'tesseract': {'name': 'Tesseract',
+                                    'image_format': 'TIF',
+                                    'engine_path': 'tesseract',
+                                    'arguments': '$IMAGE $FILE; cat '
+                                    '$FILE.txt; rm $FILE $FILE.txt'},
+                      'ocrad': {'name': 'Ocrad',
+                                'image_format': 'PPM',
+                                'engine_path': 'ocrad',
+                                'arguments': '-F utf8 $IMAGE'},
+                      'gocr': {'name': 'GOCR',
+                               'image_format': 'PPM',
+                               'engine_path': 'gocr',
+                               'arguments': '-f UTF8 $IMAGE'},
+                      'cuneiform': {'name': 'Cuneiform',
+                               'image_format': 'BMP',
+                               'engine_path': 'cuneiform',
+                               'arguments': '-f text -o $FILE $IMAGE >'
+                               ' /dev/null 2> /dev/null && cat $FILE'
+                               ' && rm $FILE'},
+                     }
+
+class ConfigurationManager(object):
+
+    TEXT_FILL = 'text_fill'
+    IMAGE_FILL = 'image_fill'
+    BOXES_STROKE = 'boxes_stroke'
+    WINDOW_SIZE = 'window_size'
+    UNPAPER = 'unpaper'
+    UNPAPER_USE_BLACK_FILTER = 'unpaper_use_black_filter'
+    UNPAPER_NOISE_FILTER_INTENSITY = 'unpaper_noise_filter_intensity'
+    UNPAPER_GRAY_FILTER_SIZE = 'unpaper_gray_filter_size'
+    UNPAPER_EXTRA_OPTIONS = 'unpaper_extra_options'
+    UNPAPER_IMAGES_AFTER_ADDITION = 'unpaper_images_after_addition'
+    FAVORITE_ENGINE = 'favorite_engine'
+    IMPROVE_COLUMN_DETECTION = 'improve_column_detection'
+    COLUMN_MIN_WIDTH = 'column_min_width'
+    CLEAN_TEXT = 'clean_text'
+    ADJUST_BOXES_BOUNDS = 'adjust_boxes_bounds'
+    BOUNDS_ADJUSTMENT_SIZE = 'bounds_adjustment_size'
+    DESKEW_IMAGES_AFTER_ADDITION = 'deskew_images_after_addition'
+
+    TEMPORARY_FOLDER = tempfile.mkdtemp(prefix = OCRFEEDER_COMPACT_NAME + '_')
+
+
+    DEFAULTS = {TEXT_FILL: (94, 156, 235, 150),
+                BOXES_STROKE: (94, 156, 235, 250),
+                IMAGE_FILL: (0, 183, 0, 150),
+                WINDOW_SIZE: 'auto',
+                UNPAPER: getExecPath('unpaper') or '',
+                UNPAPER_USE_BLACK_FILTER: True,
+                UNPAPER_NOISE_FILTER_INTENSITY: 'auto',
+                UNPAPER_GRAY_FILTER_SIZE: 'auto',
+                FAVORITE_ENGINE: 'ocrad',
+                IMPROVE_COLUMN_DETECTION: True,
+                COLUMN_MIN_WIDTH: 'auto',
+                CLEAN_TEXT: True,
+                ADJUST_BOXES_BOUNDS: True,
+                BOUNDS_ADJUSTMENT_SIZE: 'auto',
+                DESKEW_IMAGES_AFTER_ADDITION: False,
+                UNPAPER_IMAGES_AFTER_ADDITION: False,
+                UNPAPER_EXTRA_OPTIONS: '',
+                }
+
+    conf = dict(DEFAULTS)
+
+    def __init__(self):
+        self.user_configuration_folder = os.path.expanduser('~/.ocrfeeder')
+        self.user_engines_folder = os.path.join(self.user_configuration_folder, 'engines')
+        self.makeUserConfigurationFolder()
+        self.has_unpaper = self.getDefault(self.UNPAPER)
+
+    def makeUserConfigurationFolder(self):
+        if not os.path.exists(self.user_engines_folder):
+            os.makedirs(self.user_engines_folder)
+        if [file_name for file_name in os.listdir(self.user_engines_folder)\
+            if file_name.endswith('.xml')]:
+            return
+        for engine in self.getEnginesInSystem():
+            engine_file = os.path.join(self.user_engines_folder, engine.name)
+            engine.saveToXml('%s.xml' % engine_file)
+
+    def getEnginesInSystem(self):
+        existing_engines = []
+        engines_paths = [(name, getExecPath(conf['engine_path']))\
+                         for name, conf in PREDEFINED_ENGINES.items()]
+        for name, path in engines_paths:
+            if not path:
+                continue
+            engine_name = PREDEFINED_ENGINES[name].get('name', None)
+            arguments = PREDEFINED_ENGINES[name].get('arguments', None)
+            if not arguments or not engine_name:
+                continue
+            image_format = PREDEFINED_ENGINES[name].get('image_format', 'PPM')
+            failure_string = PREDEFINED_ENGINES[name].get('failure_string', '')
+            engine = Engine(engine_name,
+                            path,
+                            arguments,
+                            temporary_folder = self.TEMPORARY_FOLDER,
+                            image_format = image_format,
+                            failure_string = failure_string)
+            existing_engines.append(engine)
+        return existing_engines
+
+    def setConf(self, conf_key, value):
+        ConfigurationManager.conf[conf_key] = value
+
+    def getConf(self, conf_key):
+        return ConfigurationManager.conf[conf_key]
+
+    def setFavoriteEngine(self, engine_name):
+        self.setConf(self.FAVORITE_ENGINE, engine_name)
+
+    def getFavoriteEngine(self):
+        return self.getConf(self.FAVORITE_ENGINE)
+
+    def __getColorFromString(self, color):
+        if type(color) != str:
+            return color
+        color_list = [value.strip('()\ ') for value in color.split(',')]
+        try:
+            int_color_list = [int(value) for value in color_list]
+        except ValueError, exception:
+            return None
+        return tuple(int_color_list)
+
+    def setTextFill(self, color):
+        self.setConf(self.TEXT_FILL, color)
+
+    def setBoxesStroke(self, color):
+        self.setConf(self.BOXES_STROKE, color)
+
+    def setImageFill(self, color):
+        self.setConf(self.IMAGE_FILL, color)
+
+    def getTextFill(self):
+        return self.__getColorFromString(self.getConf(self.TEXT_FILL))
+
+    def getBoxesStroke(self):
+        return self.__getColorFromString(self.getConf(self.BOXES_STROKE))
+
+    def getImageFill(self):
+        return self.__getColorFromString(self.getConf(self.IMAGE_FILL))
+
+    def setWindowSize(self, window_size):
+        self.setConf(self.WINDOW_SIZE, window_size)
+
+    def getWindowSize(self):
+        return self.getConf(self.WINDOW_SIZE)
+
+    def setUnpaper(self, unpaper):
+        self.setConf(self.UNPAPER, unpaper)
+
+    def getUnpaper(self):
+        return self.getConf(self.UNPAPER)
+
+    def setUseBlackFilter(self, use_black_filter):
+        self.setConf(self.UNPAPER_USE_BLACK_FILTER, use_black_filter)
+
+    def getUseBlackFilter(self):
+        use_black_filter = self.getConf(self.UNPAPER_USE_BLACK_FILTER)
+        return self.__convertBoolSetting(use_black_filter)
+
+    def setNoiseFilterIntensity(self, noise_filter_intensity):
+        self.setConf(self.UNPAPER_NOISE_FILTER_INTENSITY,
+                     noise_filter_intensity)
+
+    def getNoiseFilterIntensity(self):
+        noise_filter_intensity = \
+            self.getConf(self.UNPAPER_NOISE_FILTER_INTENSITY)
+        if noise_filter_intensity == 'auto' or noise_filter_intensity == 'none':
+            return noise_filter_intensity
+        try:
+            noise_filter_intensity_int = int(noise_filter_intensity)
+        except ValueError:
+            return 'auto'
+        return noise_filter_intensity_int
+
+    def setGrayFilterSize(self, gray_filter_size):
+        self.setConf(self.UNPAPER_GRAY_FILTER_SIZE,
+                     gray_filter_size)
+
+    def getGrayFilterSize(self):
+        gray_filter_size = self.getConf(self.UNPAPER_GRAY_FILTER_SIZE)
+        if gray_filter_size == 'auto' or gray_filter_size == 'none':
+            return gray_filter_size
+        try:
+            gray_filter_size_int = int(gray_filter_size)
+        except ValueError:
+            return 'auto'
+        return gray_filter_size_int
+
+    def setUnpaperExtraOptions(self, extra_options):
+        self.setConf(self.UNPAPER_EXTRA_OPTIONS, extra_options)
+
+    def getUnpaperExtraOptions(self):
+        return self.getConf(self.UNPAPER_EXTRA_OPTIONS)
+
+    def setUnpaperImagesAfterAddition(self, unpaper_images_after_addition):
+        self.setConf(self.UNPAPER_IMAGES_AFTER_ADDITION,
+                     unpaper_images_after_addition)
+
+    def getUnpaperImagesAfterAddition(self):
+        unpaper = self.getConf(self.UNPAPER_IMAGES_AFTER_ADDITION)
+        return self.__convertBoolSetting(unpaper)
+
+    def setImproveColumnDetection(self, improve_column_detection):
+        self.setConf(self.IMPROVE_COLUMN_DETECTION, improve_column_detection)
+
+    def getImproveColumnDetection(self):
+        improve = self.getConf(self.IMPROVE_COLUMN_DETECTION)
+        return self.__convertBoolSetting(improve)
+
+    def setColumnMinWidth(self, column_min_width):
+        self.setConf(self.COLUMN_MIN_WIDTH, column_min_width)
+
+    def getColumnMinWidth(self):
+        column_min_width = self.getConf(self.COLUMN_MIN_WIDTH)
+        if column_min_width == 'auto':
+            return column_min_width
+        try:
+            column_min_width_int = int(column_min_width)
+        except ValueError:
+            return 'auto'
+        return column_min_width_int
+
+    def getCleanText(self):
+        clean_text = self.getConf(self.CLEAN_TEXT)
+        return self.__convertBoolSetting(clean_text)
+
+    def setCleanText(self, clean_text):
+        self.setConf(self.CLEAN_TEXT, clean_text)
+
+    def setAdjustBoxesBounds(self, adjust_boxes_bounds):
+        self.setConf(self.ADJUST_BOXES_BOUNDS, adjust_boxes_bounds)
+
+    def getAdjustBoxesBounds(self):
+        adjust = self.getConf(self.ADJUST_BOXES_BOUNDS)
+        return self.__convertBoolSetting(adjust)
+
+    def setDeskewImagesAfterAddition(self, deskew_images_after_addition):
+        self.setConf(self.DESKEW_IMAGES_AFTER_ADDITION,
+                     deskew_images_after_addition)
+
+    def getDeskewImagesAfterAddition(self):
+        deskew = self.getConf(self.DESKEW_IMAGES_AFTER_ADDITION)
+        return self.__convertBoolSetting(deskew)
+
+    def setBoundsAdjustmentSize(self, adjustment_size):
+        self.setConf(self.BOUNDS_ADJUSTMENT_SIZE, adjustment_size)
+
+    def getBoundsAdjustmentSize(self):
+        adjustment_size = self.getConf(self.BOUNDS_ADJUSTMENT_SIZE)
+        if adjustment_size == 'auto':
+            return adjustment_size
+        try:
+            adjustment_size_int = int(adjustment_size)
+        except ValueError:
+            return 'auto'
+        return adjustment_size_int
+
+    def __convertBoolSetting(self, setting):
+        if type(setting) == str:
+            if setting == 'True':
+                setting = True
+            else:
+                setting = False
+        return setting
+
+    def setDefaults(self):
+        ConfigurationManager.conf = dict(self.DEFAULTS)
+
+    def getDefault(self, variable_name):
+        if variable_name in self.DEFAULTS.keys():
+            return self.DEFAULTS[variable_name]
+        else:
+            return ''
+
+    def loadConfiguration(self):
+        configuration_file = os.path.join(self.user_configuration_folder, 'preferences.xml')
+        if not os.path.isfile(configuration_file):
+            return False
+        document = minidom.parse(configuration_file)
+        for key in self.DEFAULTS.keys():
+            nodeList = document.getElementsByTagName(key)
+            if nodeList:
+                for node in nodeList:
+                    for child in node.childNodes:
+                        if child.nodeType == child.TEXT_NODE:
+                            ConfigurationManager.conf[key] = str(child.nodeValue)
+                            break
+        return True
+
+    def configurationToXml(self):
+        configuration_file = os.path.join(self.user_configuration_folder, 'preferences.xml')
+        doc = minidom.Document()
+        root_node = doc.createElement('ocrfeeder')
+        for key, value in ConfigurationManager.conf.items():
+            new_node = doc.createElement(key)
+            new_node.appendChild(doc.createTextNode(str(value)))
+            root_node.appendChild(new_node)
+        configuration = doc.toxml(encoding = 'utf-8')
+        configuration += '\n' + root_node.toxml(encoding = 'utf-8')
+        new_configuration_file = open(configuration_file, 'w')
+        new_configuration_file.write(configuration)
+        new_configuration_file.close()
+
+    def removeTemporaryFolder(self):
+        try:
+            shutil.rmtree(self.TEMPORARY_FOLDER)
+        except:
+            debug('Error when removing the temporary folder: ' + \
+                  self.TEMPORARY_FOLDER)
+
+    text_fill = property(getTextFill,
+                         setTextFill)
+    image_fill = property(getImageFill,
+                          setImageFill)
+    boxes_stroke = property(getBoxesStroke,
+                            setBoxesStroke)
+    favorite_engine = property(getFavoriteEngine,
+                               setFavoriteEngine)
+    window_size = property(getWindowSize,
+                           setWindowSize)
+    unpaper = property(getUnpaper,
+                       setUnpaper)
+
+    unpaper_use_black_filter = property(getUseBlackFilter,
+                                        setUseBlackFilter)
+
+    unpaper_gray_filter_size = property(getGrayFilterSize,
+                                        setGrayFilterSize)
+
+    unpaper_noise_filter_intensity = property(getNoiseFilterIntensity,
+                                              setNoiseFilterIntensity)
+
+    unpaper_images_after_addition = property(getUnpaperImagesAfterAddition,
+                                             setUnpaperImagesAfterAddition)
+
+    unpaper_extra_options = property(getUnpaperExtraOptions,
+                                     setUnpaperExtraOptions)
+
+    improve_column_detection = property(getImproveColumnDetection,
+                                        setImproveColumnDetection)
+    column_min_width = property(getColumnMinWidth,
+                                setColumnMinWidth)
+
+    clean_text = property(getCleanText,
+                          setCleanText)
+
+    adjust_boxes_bounds = property(getAdjustBoxesBounds,
+                                   setAdjustBoxesBounds)
+
+    bounds_adjustment_size = property(getBoundsAdjustmentSize,
+                                      setBoundsAdjustmentSize)
+
+    deskew_images_after_addition = property(getDeskewImagesAfterAddition,
+                                            setDeskewImagesAfterAddition)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]