[ocrfeeder] Port to Python 3



commit 8615aff3966dbfe24ed45eed6d34dc9b4e6b4e90
Author: Joaquim Rocha <me joaquimrocha com>
Date:   Wed Jan 8 00:17:27 2020 +0100

    Port to Python 3
    
    Python 3 has been around for a while now, and Python 2 is going away
    in Debian soon, so a port was due.
    
    These changes represent the port and were done by using lib2to3, with
    a few manual editions to add more needed changes (like dropping the
    attempt to encode a string), or dropping some unnecessary ones, e.g.
    iterating through an object as returned by the dict's keys() method
    is fine, but lib2to3 still creates a list for it.
    
    Special thanks to Reuben (@reubot on GNOME Gitlab) for having done an
    initial MR for the port, which made me want to move this foward (but
    I am pushing a different commit as it needed many changes).

 bin/ocrfeeder-cli.in                       |  3 +--
 bin/ocrfeeder.in                           |  2 +-
 configure.ac                               |  2 +-
 src/ocrfeeder/feeder/documentGeneration.py | 20 ++++++++---------
 src/ocrfeeder/feeder/imageManipulation.py  |  4 +---
 src/ocrfeeder/feeder/layoutAnalysis.py     | 12 +++++-----
 src/ocrfeeder/feeder/ocrEngines.py         | 24 ++++++--------------
 src/ocrfeeder/studio/boxesarea.py          |  2 --
 src/ocrfeeder/studio/dataHolder.py         |  8 +++----
 src/ocrfeeder/studio/pagesiconview.py      |  2 --
 src/ocrfeeder/studio/project.py            |  8 +++----
 src/ocrfeeder/studio/studioBuilder.py      | 18 +++++++--------
 src/ocrfeeder/studio/widgetModeler.py      | 36 ++++++++++++++----------------
 src/ocrfeeder/studio/widgetPresenter.py    | 30 ++++++++++++-------------
 src/ocrfeeder/util/asyncworker.py          | 12 +++++-----
 src/ocrfeeder/util/configuration.py        | 14 +++++-------
 src/ocrfeeder/util/constants.py.in         |  2 --
 src/ocrfeeder/util/graphics.py             | 14 +++++-------
 src/ocrfeeder/util/lib.py                  | 20 +++++------------
 src/ocrfeeder/util/log.py                  |  4 +---
 20 files changed, 94 insertions(+), 143 deletions(-)
---
diff --git a/bin/ocrfeeder-cli.in b/bin/ocrfeeder-cli.in
index 809dcfa..80e7f3b 100644
--- a/bin/ocrfeeder-cli.in
+++ b/bin/ocrfeeder-cli.in
@@ -1,5 +1,4 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
 
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
diff --git a/bin/ocrfeeder.in b/bin/ocrfeeder.in
index f2e5386..6d9b585 100644
--- a/bin/ocrfeeder.in
+++ b/bin/ocrfeeder.in
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
diff --git a/configure.ac b/configure.ac
index 8301c2a..d2714b5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -25,7 +25,7 @@ AS_IF([test "x$enable_sandbox" = "xyes"], [
 ])
 
 dnl == check for python ==
-AM_PATH_PYTHON(2.5)
+AM_PATH_PYTHON(3.5)
 
 dnl == Python dependencies ==
 AX_PYTHON_MODULE([enchant],[needed])
diff --git a/src/ocrfeeder/feeder/documentGeneration.py b/src/ocrfeeder/feeder/documentGeneration.py
index c08a8ab..e47d6d7 100644
--- a/src/ocrfeeder/feeder/documentGeneration.py
+++ b/src/ocrfeeder/feeder/documentGeneration.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009 Joaquim Rocha
@@ -51,7 +49,7 @@ class DocumentGeneratorManager(object):
         return self.GENERATORS.get(id)
 
     def getFormats(self):
-        return self.GENERATORS.keys()
+        return list(self.GENERATORS.keys())
 
 class DocumentGenerator(object):
 
@@ -174,7 +172,7 @@ class HtmlGenerator(DocumentGenerator):
 
     def save(self):
         pages = []
-        for i in xrange(len(self.bodies)):
+        for i in range(len(self.bodies)):
             previous_page = ''
             next_page = ''
             if i != 0:
@@ -213,12 +211,12 @@ class HtmlGenerator(DocumentGenerator):
             os.mkdir(images_folder)
         if pages:
             file = open(os.path.join(self.name, 'index.html'), 'w')
-            file.write(pages[0].encode('utf-8'))
+            file.write(pages[0])
             file.close()
             if len(pages) > 1:
-                for i in xrange(1, len(pages)):
+                for i in range(1, len(pages)):
                     file = open(os.path.join(self.name, 'page%s.html' % (i + 1)), 'w')
-                    file.write(pages[i].encode('utf-8'))
+                    file.write(pages[i])
                     file.close()
         if self.styles:
             file = open(os.path.join(self.name, 'style.css'), 'w')
@@ -250,7 +248,7 @@ class OdtGenerator(DocumentGenerator):
     def addText(self, data_box):
         text = data_box.getText()
         frame_style = Style(name='FrameStyle', family = 'graphic')
-        debug('Angle: %s' % data_box.text_data.angle)
+        debug('Angle: %s', data_box.text_data.angle)
         angle = data_box.text_data.angle
         if angle:
             frame_style = Style(name='FrameStyleRotated', family = 'graphic')
@@ -301,7 +299,7 @@ class OdtGenerator(DocumentGenerator):
             try:
                 os.unlink(image)
             except:
-                debug('Error removing image: %s' % image)
+                debug('Error removing image: %s', image)
 
     def __handlePageMaster(self, page_data):
         layout_name = 'Page%s%s' % (page_data.width, page_data.height)
@@ -355,7 +353,7 @@ class OdtGenerator(DocumentGenerator):
 class PlaintextGenerator(DocumentGenerator):
     def __init__(self, name):
         self.name = name
-        self.text = u''
+        self.text = ''
 
     def addText(self, newText):
         self.text += newText
@@ -368,7 +366,7 @@ class PlaintextGenerator(DocumentGenerator):
             # This will create a new file or **overwrite an existing file
             f = open(self.name, "w")
             try:
-                f.write(self.text.encode('utf-8'))
+                f.write(self.text)
             finally:
                 f.close() # Close the file
         except IOError:
diff --git a/src/ocrfeeder/feeder/imageManipulation.py b/src/ocrfeeder/feeder/imageManipulation.py
index 9b08e32..c940380 100644
--- a/src/ocrfeeder/feeder/imageManipulation.py
+++ b/src/ocrfeeder/feeder/imageManipulation.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009 Joaquim Rocha
@@ -42,7 +40,7 @@ class ImageProcessor:
                 self.black_n_white_image = self.original_image.convert('L')
                 if not self.window_size:
                     self.window_size = self.original_image.size[1] / 60.
-                debug('Window Size: %s' % self.window_size)
+                debug('Window Size: %s', self.window_size)
             except:
                 debug(sys.exc_info())
                 raise ImageManipulationError(error_message)
diff --git a/src/ocrfeeder/feeder/layoutAnalysis.py b/src/ocrfeeder/feeder/layoutAnalysis.py
index a2fc873..f63a1d4 100644
--- a/src/ocrfeeder/feeder/layoutAnalysis.py
+++ b/src/ocrfeeder/feeder/layoutAnalysis.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009 Joaquim Rocha
@@ -22,7 +20,7 @@ from ocrfeeder.util.log import debug
 from ocrfeeder.util import graphics, lib
 from ocrfeeder.util.constants import OCRFEEDER_DEBUG, DTP
 from ocrfeeder.studio.dataHolder import DataBox
-from imageManipulation import ImageProcessor
+from .imageManipulation import ImageProcessor
 from PIL import Image
 import re
 import math
@@ -93,10 +91,10 @@ class Block:
             self.extra_charge = NONE
 
     def __getVerticalRange(self):
-        return xrange(self.start_line, self.finish_line + 1)
+        return range(self.start_line, self.finish_line + 1)
 
     def __getHorizontalRange(self):
-        return xrange(self.first_one, self.last_one + 1)
+        return range(self.first_one, self.last_one + 1)
 
     def __inVerticalRange(self, verticalRange):
         begin = self.start_line
@@ -430,7 +428,7 @@ class ImageDeskewer(object):
 
         if not hough_accumulator:
             return 0
-        max_voted = hough_accumulator.keys()[0]
+        max_voted = list(hough_accumulator.keys())[0]
         for r_angle in hough_accumulator:
             max_voted_value = hough_accumulator.get(max_voted)
             if hough_accumulator[r_angle] > max_voted_value:
@@ -547,7 +545,7 @@ class LayoutAnalysis(object):
         text = self.ocr_engine.read()
         if self.clean_text:
             text = self.__cleanText(text)
-        return lib.ensureUnicode(text)
+        return text
 
     def __cleanText(self, text):
         clean_text = re.sub(r'(?<!-)-\n(?!\n)', r'', text)
diff --git a/src/ocrfeeder/feeder/ocrEngines.py b/src/ocrfeeder/feeder/ocrEngines.py
index 0bbf118..e0cab8c 100644
--- a/src/ocrfeeder/feeder/ocrEngines.py
+++ b/src/ocrfeeder/feeder/ocrEngines.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009 Joaquim Rocha
@@ -80,7 +78,7 @@ class Engine:
             if self.languages:
                 favorite_language = self.languages.get(self._favorite_language, '')
                 if not favorite_language:
-                    values = self.languages.values()
+                    values = list(self.languages.values())
                     if values:
                         favorite_language = values[0]
                 parsed_arguments = parsed_arguments.replace(LANGUAGE_ARGUMENT,
@@ -89,15 +87,7 @@ class Engine:
         else:
             parsed_arguments = parsed_arguments.replace(LANGUAGE_ARGUMENT, '')
 
-        text = os.popen(self.engine_path + ' ' + parsed_arguments).read()
-        try:
-            try:
-                text = unicode(text, 'utf-8', 'replace')
-            except UnicodeDecodeError:
-                text = unicode(text, 'ascii', 'replace').encode('utf-8', 'replace')
-        finally:
-            os.unlink(self.image_path)
-        return text
+        return os.popen(self.engine_path + ' ' + parsed_arguments).read()
 
     def classify(self, reading_output, rules = []):
         stripped_output = reading_output.strip()
@@ -192,7 +182,7 @@ class OcrEnginesManager:
         return None
 
     def replaceEngine(self, engine, new_engine):
-        for i in xrange(len(self.ocr_engines)):
+        for i in range(len(self.ocr_engines)):
             eng, path = self.ocr_engines[i]
             if eng == engine:
                 new_path = self.engineToXml(new_engine, path)
@@ -250,11 +240,11 @@ class OcrEnginesManager:
 
         try:
             engine = Engine(**arguments)
-        except TypeError, exception:
-            debug('Error when unserializing engine: %s' % exception.message)
+        except TypeError as exception:
+            debug('Error when unserializing engine: %s', exception.message)
             engine = None
-        except WrongSettingsForEngine, we:
-            debug("Cannot load engine at %s: %s" %( xml_file_name, str(we)))
+        except WrongSettingsForEngine as we:
+            debug("Cannot load engine at %s: %s", xml_file_name, str(we))
             engine = None
         else:
             engine.temporary_folder = self.configuration_manager.TEMPORARY_FOLDER
diff --git a/src/ocrfeeder/studio/boxesarea.py b/src/ocrfeeder/studio/boxesarea.py
index 34d89fb..e6a000d 100644
--- a/src/ocrfeeder/studio/boxesarea.py
+++ b/src/ocrfeeder/studio/boxesarea.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009-2013 Joaquim Rocha <me joaquimrocha com>
diff --git a/src/ocrfeeder/studio/dataHolder.py b/src/ocrfeeder/studio/dataHolder.py
index b4931e4..2e6f6d9 100644
--- a/src/ocrfeeder/studio/dataHolder.py
+++ b/src/ocrfeeder/studio/dataHolder.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009 Joaquim Rocha
@@ -72,7 +70,7 @@ class DataBox(GObject.GObject):
                      (GObject.TYPE_INT,))
         }
 
-    def __init__(self, x = 0, y = 0, width = 0, height = 0, image = None, type = TEXT_TYPE, text = u''):
+    def __init__(self, x = 0, y = 0, width = 0, height = 0, image = None, type = TEXT_TYPE, text = ''):
         super(DataBox, self).__init__()
         self.x = int(x)
         self.y = int(y)
@@ -132,7 +130,7 @@ class DataBox(GObject.GObject):
         self.text_data.weight = font_weight
 
     def setText(self, text):
-        self.text = lib.ensureUnicode(text)
+        self.text = text
 
     def getText(self):
         return self.text
@@ -233,7 +231,7 @@ class PageData:
         return {'PageData': dictionary}
 
     def getTextFromBoxes(self, data_boxes=None):
-        text = u''
+        text = ''
         if data_boxes is None:
             data_boxes = self.data_boxes
         number_of_boxes = len(data_boxes)
diff --git a/src/ocrfeeder/studio/pagesiconview.py b/src/ocrfeeder/studio/pagesiconview.py
index 8f2c278..39966bf 100644
--- a/src/ocrfeeder/studio/pagesiconview.py
+++ b/src/ocrfeeder/studio/pagesiconview.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009-2013 Joaquim Rocha <me joaquimrocha com>
diff --git a/src/ocrfeeder/studio/project.py b/src/ocrfeeder/studio/project.py
index edff930..4e4f54c 100644
--- a/src/ocrfeeder/studio/project.py
+++ b/src/ocrfeeder/studio/project.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009 Joaquim Rocha
@@ -18,7 +16,7 @@
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ###########################################################################
 
-from dataHolder import PageData, DataBox, TextData
+from .dataHolder import PageData, DataBox, TextData
 from ocrfeeder.util.log import debug
 from ocrfeeder.util.configuration import ConfigurationManager
 from xml.dom import minidom
@@ -75,7 +73,7 @@ class ProjectSaver:
             for element in item:
                 self.convertToXml(element, root_node)
         else:
-            text = unicode(str(item), 'utf-8')
+            text = str(item)
             text_node = self.document.createTextNode(text)
             root_node.appendChild(text_node)
         return root_node
@@ -103,7 +101,7 @@ class ProjectSaver:
             embedded_name = os.path.join(images_dir, embbeded_name)
             shutil.copy(original_name, embedded_name)
             zip.write(embedded_name)
-        f = open(os.path.join(os.curdir, 'project.xml'), 'w')
+        f = open(os.path.join(os.curdir, 'project.xml'), 'wb')
         f.write(xml_content)
         f.close()
         zip.write(os.path.join(os.curdir, 'project.xml'))
diff --git a/src/ocrfeeder/studio/studioBuilder.py b/src/ocrfeeder/studio/studioBuilder.py
index 43cb432..cbf4490 100644
--- a/src/ocrfeeder/studio/studioBuilder.py
+++ b/src/ocrfeeder/studio/studioBuilder.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009-2013 Joaquim Rocha
@@ -27,11 +25,11 @@ from ocrfeeder.util import lib
 from ocrfeeder.util.constants import *
 import sys
 import os.path
-import urllib
-import widgetPresenter
-from widgetModeler import ImageReviewer_Controler
-from dataHolder import DataBox, TextData
-from pagesiconview import PagesIconView
+import urllib.request, urllib.parse, urllib.error
+from . import widgetPresenter
+from .widgetModeler import ImageReviewer_Controler
+from .dataHolder import DataBox, TextData
+from .pagesiconview import PagesIconView
 from ocrfeeder.feeder.ocrEngines import Engine, OcrEnginesManager
 from ocrfeeder.feeder.documentGeneration import DocumentGeneratorManager
 from ocrfeeder.util.configuration import ConfigurationManager
@@ -188,7 +186,7 @@ class Studio:
         dialog = widgetPresenter.QueuedEventsProgressDialog(self.main_window)
         item_obtain = AsyncItem(lib.obtainScanners,(),
                                 self.__obtainScannersFinishedCb,(dialog,))
-        info_obtain = (_('Obtaining scanners'), _(u'Please wait…'))
+        info_obtain = (_('Obtaining scanners'), _('Please wait…'))
         dialog.setItemsList([(info_obtain, item_obtain)])
         dialog.run()
 
@@ -214,7 +212,7 @@ class Studio:
                 self.main_window)
             item_scan = AsyncItem(lib.scan,(device,),
                                   self.__scanFinishedCb,(dialog_scan,))
-            info_scan = (_('Scanning'), _(u'Please wait…'))
+            info_scan = (_('Scanning'), _('Please wait…'))
             dialog_scan.setItemsList([(info_scan, item_scan)])
             dialog_scan.run()
         else:
@@ -258,7 +256,7 @@ class Studio:
                               self.configuration_manager.TEMPORARY_FOLDER),
                              self.__loadPdfFinishedCb,
                              (dialog,))
-            info = (_('Loading PDF'), _(u'Please wait…'))
+            info = (_('Loading PDF'), _('Please wait…'))
             dialog.setItemsList([(info, item)])
             dialog.run()
 
diff --git a/src/ocrfeeder/studio/widgetModeler.py b/src/ocrfeeder/studio/widgetModeler.py
index ced5efd..34592b3 100644
--- a/src/ocrfeeder/studio/widgetModeler.py
+++ b/src/ocrfeeder/studio/widgetModeler.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009-2013 Joaquim Rocha
@@ -18,12 +16,12 @@
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ###########################################################################
 
-from boxesarea import BoxesArea
-from dataHolder import DataBox, PageData, TEXT_TYPE, IMAGE_TYPE
+from .boxesarea import BoxesArea
+from .dataHolder import DataBox, PageData, TEXT_TYPE, IMAGE_TYPE
 from ocrfeeder.feeder.documentGeneration import OdtGenerator, HtmlGenerator, PlaintextGenerator, PdfGenerator
 from ocrfeeder.feeder.imageManipulation import *
 from ocrfeeder.feeder.layoutAnalysis import *
-from project import ProjectSaver, ProjectLoader
+from .project import ProjectSaver, ProjectLoader
 from ocrfeeder.util import graphics, ALIGN_LEFT, ALIGN_RIGHT, ALIGN_CENTER, \
      ALIGN_FILL, PAPER_SIZES
 from ocrfeeder.util.lib import getNonExistingFileName, unpaperImage
@@ -31,7 +29,7 @@ from ocrfeeder.util.log import debug, warning
 from ocrfeeder.util.configuration import ConfigurationManager
 from ocrfeeder.util import constants
 from ocrfeeder.util.asyncworker import AsyncItem
-from widgetPresenter import BoxEditor, PagesToExportDialog, FileDialog, \
+from .widgetPresenter import BoxEditor, PagesToExportDialog, FileDialog, \
     PageSizeDialog, UnpaperDialog, \
     QueuedEventsProgressDialog, SpellCheckerDialog
 import gettext
@@ -120,7 +118,7 @@ class ImageReviewer(Gtk.Paned):
 
     def removedBox(self, widget, box):
         self.updateMainWindow()
-        if not self.boxes_dict.has_key(box):
+        if box not in self.boxes_dict:
             return False
         del self.boxes_dict[box]
         if self.editor.box == box:
@@ -246,7 +244,7 @@ class ImageReviewer(Gtk.Paned):
             return
         try:
             self.image_pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.path_to_image)
-        except Exception, exception:
+        except Exception as exception:
             debug(exception.message)
             return
         self.selectable_boxes_area.setBackgroundImage(self.path_to_image)
@@ -340,7 +338,7 @@ class ImageReviewer_Controler:
            not self.configuration_manager.unpaper_images_after_addition:
             for index in range(0, len(image_path_list)):
                 if not self.__addImage(image_path_list[index], index == 0):
-                    debug('Failed to load image "%s"' % image_path_list[index])
+                    debug('Failed to load image "%s"', image_path_list[index])
             return
         dialog = QueuedEventsProgressDialog(self.main_window)
         for index in range(0, item_list_length):
@@ -352,12 +350,12 @@ class ImageReviewer_Controler:
                               index == item_list_length - 1,
                               index == 0))
             if item_list_length == 1:
-                item_info = (_('Preparing image'), _(u'Please wait…'))
+                item_info = (_('Preparing image'), _('Please wait…'))
             else:
                 item_info = (_('Preparing image %(current_index)s/%(total)s') % \
                              {'current_index': index + 1,
                               'total': item_list_length},
-                             _(u'Please wait…'))
+                             _('Please wait…'))
             item_list.append((item_info,item))
         dialog.setItemsList(item_list)
         dialog.run()
@@ -411,7 +409,7 @@ class ImageReviewer_Controler:
                          (reviewer.path_to_image,),
                          self.__deskewCurrentImageFinishedCb,
                          (dialog, reviewer))
-        item_info = (_('Deskewing image'), _(u'Please wait…'))
+        item_info = (_('Deskewing image'), _('Please wait…'))
         dialog.setItemsList([(item_info, item)])
         dialog.run()
 
@@ -508,7 +506,7 @@ class ImageReviewer_Controler:
                          (page,),
                          self.__performRecognitionForPageFinishedCb,
                          (dialog, page, [page]))
-        info = (_('Recognizing Page'), _(u'Please wait…'))
+        info = (_('Recognizing Page'), _('Please wait…'))
         dialog.setItemsList([(info, item)])
         dialog.run()
 
@@ -526,7 +524,7 @@ class ImageReviewer_Controler:
                              self.__performRecognitionForPageFinishedCb,
                              (dialog, page, pages))
             info = (_('Recognizing Document'),
-                    _(u'Recognizing page %(page_number)s/%(total_pages)s. Please wait…') % {'page_number': i,
+                    _('Recognizing page %(page_number)s/%(total_pages)s. Please wait…') % {'page_number': i,
                                                                                             'total_pages': 
total})
             items.append((info, item))
             i += 1
@@ -742,7 +740,7 @@ class ImageReviewer_Controler:
                     page.setSize(size)
             else:
                 current_reviewer.page.setSize(size)
-            debug('Page size: %s' % size)
+            debug('Page size: %s', size)
         page_size_dialog.destroy()
         self.__updateStatusBar(current_reviewer)
 
@@ -950,7 +948,7 @@ class Editor:
         if index == -1:
             return
         engine = self.ocr_engines[index][0]
-        self.box_editor.setAvailableLanguages(engine.getLanguages().keys())
+        self.box_editor.setAvailableLanguages(list(engine.getLanguages().keys()))
 
     def _onOCREngineChanged(self, combobox):
         self._resetLanguages()
@@ -982,7 +980,7 @@ class Editor:
     def performOcr(self, engine_name = None):
         selected_engine_index = self.box_editor.getSelectedOcrEngine()
         if engine_name:
-            for i in xrange(len(self.ocr_engines)):
+            for i in range(len(self.ocr_engines)):
                 if self.ocr_engines[i][0].name == engine_name:
                     selected_engine_index = i
                     break
@@ -1000,7 +998,7 @@ class Editor:
     def performClassification(self, engine_name = None):
         selected_engine_index = self.box_editor.getSelectedOcrEngine()
         if engine_name:
-            for i in xrange(len(self.ocr_engines)):
+            for i in range(len(self.ocr_engines)):
                 if self.ocr_engines[i][0].name == engine_name:
                     selected_engine_index = i
                     break
@@ -1012,7 +1010,7 @@ class Editor:
     def __pressedAngleDetectionButton(self, widget):
         image = graphics.convertPixbufToImage(self.box_editor.getImage())
         angle = graphics.getHorizontalAngleForText(image)
-        debug('ANGLE: %s' % angle)
+        debug('ANGLE: %s', angle)
         self.box_editor.setAngle(angle)
 
     def saveDataBox(self):
diff --git a/src/ocrfeeder/studio/widgetPresenter.py b/src/ocrfeeder/studio/widgetPresenter.py
index 359af9e..9b74dcb 100644
--- a/src/ocrfeeder/studio/widgetPresenter.py
+++ b/src/ocrfeeder/studio/widgetPresenter.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009-2013 Joaquim Rocha <me joaquimrocha com>
@@ -19,7 +17,7 @@
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ###########################################################################
 
-from dataHolder import DataBox, TEXT_TYPE, IMAGE_TYPE
+from .dataHolder import DataBox, TEXT_TYPE, IMAGE_TYPE
 from ocrfeeder.util import lib, PAPER_SIZES
 from ocrfeeder.util.configuration import ConfigurationManager
 from ocrfeeder.util.asyncworker import AsyncWorker
@@ -35,7 +33,7 @@ import signal
 import subprocess
 import sys
 import threading
-import Queue
+import queue
 import time
 _ = gettext.gettext
 
@@ -84,12 +82,12 @@ class MainWindow(Gtk.Window):
                                   ('Quit', Gtk.STOCK_QUIT, _('_Quit'), None, _('Exit the program'), 
menu_items['exit']),
                                   ('OpenProject', Gtk.STOCK_OPEN, _('_Open'), None, _('Open project'), 
menu_items['open_project']),
                                   ('SaveProject', Gtk.STOCK_SAVE, _('_Save'), None, _('Save project'), 
menu_items['save_project']),
-                                  ('SaveProjectAs', Gtk.STOCK_SAVE_AS, _(u'_Save As…'), '<control><shift>s', 
_('Save project with a chosen name'), menu_items['save_project_as']),
+                                  ('SaveProjectAs', Gtk.STOCK_SAVE_AS, _('_Save As…'), '<control><shift>s', 
_('Save project with a chosen name'), menu_items['save_project_as']),
                                   ('AddImage', Gtk.STOCK_ADD, _('_Add Image'), None, _('Add another image'), 
menu_items['add_image']),
                                   ('AddFolder', Gtk.STOCK_ADD, _('Add _Folder'), None, _('Add all images in 
a folder'), menu_items['add_folder']),
                                   ('AppendProject', Gtk.STOCK_ADD, _('Append Project'), None, _('Load a 
project and append it to the current one'), menu_items['append_project']),
                                   ('ImportPDF', Gtk.STOCK_ADD, _('_Import PDF'), None, _('Import PDF'), 
menu_items['import_pdf']),
-                                  ('Export', None, _(u'_Export…'), '<control><shift>e', _('Export to a 
chosen format'), menu_items['export_dialog']),
+                                  ('Export', None, _('_Export…'), '<control><shift>e', _('Export to a chosen 
format'), menu_items['export_dialog']),
                                   ('Edit', None, _('_Edit')),
                                   ('EditPage', Gtk.STOCK_EDIT, _('_Edit Page'), None, _('Edit page 
settings'), menu_items['edit_page']),
                                   ('Preferences', Gtk.STOCK_PREFERENCES, _('_Preferences'), None, 
_('Configure the application'), menu_items['preferences']),
@@ -279,7 +277,7 @@ class LanguagesComboBox(Gtk.ComboBox):
         model = self.get_model()
         if model.get_n_columns() != 3:
             return
-        cached_languages = self._cached_iters.keys()
+        cached_languages = list(self._cached_iters.keys())
         languages_to_unset = [lang for lang in cached_languages
                               if lang not in languages]
         for lang in languages:
@@ -841,9 +839,9 @@ class PageSizeDialog(Gtk.Dialog):
         page_size_frame = PlainFrame(_('Page size'))
         size_box = Gtk.VBox(spacing = 12)
         self.paper_sizes = Gtk.ComboBoxText.new()
-        papers = PAPER_SIZES.keys()
+        papers = list(PAPER_SIZES.keys())
         papers.sort()
-        self.paper_sizes.append_text(_(u'Custom…'))
+        self.paper_sizes.append_text(_('Custom…'))
         for paper in papers:
             self.paper_sizes.append_text(paper)
         active_index = self.__checkIfSizeIsStandard(page_size)
@@ -900,7 +898,7 @@ class PageSizeDialog(Gtk.Dialog):
     def __checkIfSizeIsStandard(self, page_size):
         width, height = page_size
         i = 1
-        names = PAPER_SIZES.keys()
+        names = list(PAPER_SIZES.keys())
         names.sort()
         for name in names:
             size = PAPER_SIZES[name]
@@ -975,7 +973,7 @@ class UnpaperDialog(Gtk.Dialog):
         if os.path.exists(unpapered_image):
             unpapered_image = lib.getNonExistingFileName(unpapered_image)
         command += ' %s %s' % (name, unpapered_image)
-        progress_bar = CommandProgressBarDialog(self, command, _('Performing Unpaper'), _(u'Performing 
unpaper. Please wait…'))
+        progress_bar = CommandProgressBarDialog(self, command, _('Performing Unpaper'), _('Performing 
unpaper. Please wait…'))
         progress_bar.run()
         self.unpapered_image = unpapered_image
 
@@ -985,7 +983,7 @@ class UnpaperDialog(Gtk.Dialog):
             return
         try:
             thumbnail_image = Image.open(image_path)
-        except Exception, exception:
+        except Exception as exception:
             debug(exception.message)
             return
         thumbnail_image.thumbnail((150, 200), Image.ANTIALIAS)
@@ -1369,7 +1367,7 @@ class PreferencesDialog(Gtk.Dialog):
                 self.unpaper_images.get_active()
         index = self.engines_combo.get_active()
         if index != -1:
-            debug('ACTIVE INDEX: %s %s' % (index, self.ocr_engines[index][0].name))
+            debug('ACTIVE INDEX: %s %s', index, self.ocr_engines[index][0].name)
             self.configuration_manager.setFavoriteEngine(self.ocr_engines[index][0].name)
 
     def __makeGeneralPreferences(self, *args):
@@ -1985,7 +1983,7 @@ class OcrSettingsDialog(Gtk.Dialog):
             return True
         except:
             SimpleDialog(self, _('Error setting the new engine; please check your engine settings.'), 
_('Warning'), 'warning').run()
-            print sys.exc_info()
+            print(sys.exc_info())
             return False
 
     def __packSettingInFrame(self, box, size_group, entry_name, entry,
@@ -2094,7 +2092,7 @@ class SpellCheckerDialog():
         self._numchars = 40
 
         if self.text:
-            self._checker.set_text(self.text.decode('utf-8'))
+            self._checker.set_text(self.text)
             try:
                 self.__next()
             except AttributeError:
@@ -2154,7 +2152,7 @@ class SpellCheckerDialog():
         self.suggestions_list.clear()
 
         try:
-            self._checker.next()
+            next(self._checker)
         except StopIteration:
             self.__set_no_more()
             return False
diff --git a/src/ocrfeeder/util/asyncworker.py b/src/ocrfeeder/util/asyncworker.py
index c42b1e2..9707ee6 100644
--- a/src/ocrfeeder/util/asyncworker.py
+++ b/src/ocrfeeder/util/asyncworker.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder
 #    Copyright (C) 2010 Igalia, S.L.
@@ -23,9 +21,9 @@
 ###########################################################################
 
 from threading import Thread
-import Queue
+import queue
 from gi.repository import GLib
-from log import debug
+from .log import debug
 
 class AsyncItem(object):
 
@@ -42,7 +40,7 @@ class AsyncItem(object):
         results = error = None
         try:
             results = self.target_method(*self.target_method_args)
-        except Exception, exception:
+        except Exception as exception:
             debug(str(exception))
             error = exception
         if self.canceled or not self.finish_callback:
@@ -58,7 +56,7 @@ class AsyncWorker(Thread):
 
     def __init__(self):
         Thread.__init__(self)
-        self.queue = Queue.Queue(0)
+        self.queue = queue.Queue(0)
         self.stopped = False
         self.async_item = None
         self.item_number = -1
@@ -74,7 +72,7 @@ class AsyncWorker(Thread):
                 self.async_item.run()
                 self.queue.task_done()
                 self.async_item = None
-            except Exception, exception:
+            except Exception as exception:
                 debug(str(exception))
                 self.stop()
 
diff --git a/src/ocrfeeder/util/configuration.py b/src/ocrfeeder/util/configuration.py
index 26c33e5..1eec7f5 100644
--- a/src/ocrfeeder/util/configuration.py
+++ b/src/ocrfeeder/util/configuration.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009 Joaquim Rocha
@@ -192,7 +190,7 @@ class ConfigurationManager(object):
         color_list = [value.strip('()\ ') for value in color.split(',')]
         try:
             int_color_list = [int(value) for value in color_list]
-        except ValueError, exception:
+        except ValueError as exception:
             return None
         return tuple(int_color_list)
 
@@ -381,13 +379,13 @@ class ConfigurationManager(object):
         configuration_file = os.path.join(self.user_configuration_folder, 'preferences.xml')
         doc = minidom.Document()
         root_node = doc.createElement('ocrfeeder')
-        for key, value in ConfigurationManager.conf.items():
+        for key, value in list(ConfigurationManager.conf.items()):
             new_node = doc.createElement(key)
             new_node.appendChild(doc.createTextNode(str(value)))
             root_node.appendChild(new_node)
         configuration = doc.toxml(encoding = 'utf-8')
-        configuration += '\n' + root_node.toxml(encoding = 'utf-8')
-        new_configuration_file = open(configuration_file, 'w')
+        configuration += b'\n' + root_node.toxml(encoding = 'utf-8')
+        new_configuration_file = open(configuration_file, 'wb')
         new_configuration_file.write(configuration)
         new_configuration_file.close()
 
@@ -404,8 +402,8 @@ class ConfigurationManager(object):
            not os.path.exists(self.user_configuration_folder):
             shutil.copytree(old_config_folder, self.user_configuration_folder)
             debug('Migrated old configuration directory "%s" to the '
-                  'new one: "%s"' %
-                  (old_config_folder, self.user_configuration_folder))
+                  'new one: "%s"',
+                  old_config_folder, self.user_configuration_folder)
 
     text_fill = property(getTextFill,
                          setTextFill)
diff --git a/src/ocrfeeder/util/constants.py.in b/src/ocrfeeder/util/constants.py.in
index 4219024..ae4ae65 100644
--- a/src/ocrfeeder/util/constants.py.in
+++ b/src/ocrfeeder/util/constants.py.in
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009 Joaquim Rocha
diff --git a/src/ocrfeeder/util/graphics.py b/src/ocrfeeder/util/graphics.py
index 1fc27f2..851cfe3 100644
--- a/src/ocrfeeder/util/graphics.py
+++ b/src/ocrfeeder/util/graphics.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2009-2013 Joaquim Rocha <me joaquimrocha com>
@@ -19,8 +17,8 @@
 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ###########################################################################
 
-from lib import getNonExistingFileName
-from log import debug
+from .lib import getNonExistingFileName
+from .log import debug
 from PIL import Image
 from gi.repository import GdkPixbuf
 import math
@@ -69,7 +67,7 @@ def convertPixbufToImage(pixbuf):
     # creating the Image from bytes.
     if pixbuf.get_byte_length() > num_channels * dimensions[0] * dimensions[1]:
         i = 0
-        p = ''
+        p = b''
         for j in range(pixbuf.get_height()):
             p += pixels[i:i + pixbuf.get_width() * num_channels]
             i += pixbuf.get_rowstride()
@@ -112,7 +110,7 @@ def getTextSizeFromImage(image):
         colors.sort()
         background_color = colors[-1][1]
     text_sizes = []
-    for i in xrange(1, height):
+    for i in range(1, height):
         blank_line = True
         for j in range(0, width, 3):
             color = image.getpixel((j, i - 1))
@@ -137,7 +135,7 @@ def getTextSizeFromImage(image):
                 text_size = math.floor(i)
                 break
         text_size = max(text_sizes)
-    debug('Text Size: %s' % text_size)
+    debug('Text Size: %s', text_size)
     return text_size
 
 def getTextBeginHeight(image):
@@ -200,7 +198,7 @@ def convertMultiImage(image_path, temp_dir):
                                      extension)
             image_name = getNonExistingFileName(file_name)
             image.save(image_name, format='TIFF')
-            debug('Saved %s' % image_name)
+            debug('Saved %s', image_name)
             converted_paths.append(image_name)
             i += 1
     except EOFError:
diff --git a/src/ocrfeeder/util/lib.py b/src/ocrfeeder/util/lib.py
index c552d5a..f3f72b4 100644
--- a/src/ocrfeeder/util/lib.py
+++ b/src/ocrfeeder/util/lib.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2013 Joaquim Rocha <me joaquimrocha com>
@@ -25,12 +23,12 @@ from PIL import Image
 import tempfile
 from gi.repository import Gtk
 import math
-from constants import *
+from .constants import *
 import sane
 import tempfile
 import locale
 import xml.etree.ElementTree as etree
-from log import debug
+from .log import debug
 
 def getIconOrLabel(icon_name, label_text, icon_size = Gtk.IconSize.SMALL_TOOLBAR):
     icon = Gtk.Image()
@@ -46,7 +44,7 @@ def getIconOrLabel(icon_name, label_text, icon_size = Gtk.IconSize.SMALL_TOOLBAR
 
 def convertPdfToImages(pdf_file, temp_dir = '/tmp'):
     dir_name = tempfile.mkdtemp(dir = temp_dir)
-    debug('Converting PDF: %s to image' % pdf_file)
+    debug('Converting PDF: %s to image', pdf_file)
     resolution = 300
     file_name = os.path.splitext(os.path.basename(pdf_file))[0]
     command = 'gs -SDEVICE=jpeg -r%(resolution)sx%(resolution)s -sPAPERSIZE=letter ' \
@@ -144,7 +142,7 @@ def unpaperImage(configuration_manager, image_path):
     debug(command)
     try:
         os.system(command)
-    except Exception, exception:
+    except Exception as exception:
         debug(exception)
         return None
     finally:
@@ -156,7 +154,7 @@ def obtainScanners():
     try:
         devices = sane.get_devices()
         return devices
-    except (RuntimeError, sane._sane.error), msgerr:
+    except (RuntimeError, sane._sane.error) as msgerr:
         return None
 
 def scan(device):
@@ -169,7 +167,7 @@ def scan(device):
         result.save(filename, 'PNG')
         scandev.close()
         return filename
-    except (RuntimeError, sane._sane.error), msgerr:
+    except (RuntimeError, sane._sane.error) as msgerr:
         return None
 
 languages = {}
@@ -191,9 +189,3 @@ def makeRadioButton(label, from_widget=None):
     button.set_use_underline(True)
 
     return button
-
-def ensureUnicode(text):
-    if isinstance(text, unicode):
-        return text
-
-    return unicode(text, 'utf-8')
diff --git a/src/ocrfeeder/util/log.py b/src/ocrfeeder/util/log.py
index f688a52..e07861c 100644
--- a/src/ocrfeeder/util/log.py
+++ b/src/ocrfeeder/util/log.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 ###########################################################################
 #    OCRFeeder - The complete OCR suite
 #    Copyright (C) 2013 Joaquim Rocha <me joaquimrocha com>
@@ -20,7 +18,7 @@
 ###########################################################################
 
 import logging
-from constants import OCRFEEDER_COMPACT_NAME, OCRFEEDER_DEBUG
+from .constants import OCRFEEDER_COMPACT_NAME, OCRFEEDER_DEBUG
 
 logger = logging.getLogger(OCRFEEDER_COMPACT_NAME)
 LOG_FORMAT = "%(asctime)-15s %(levelname)s: %(message)s"



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]