[ocrfeeder] Stop using ImageReviewer instances and use PageData ones for remaining actions
- From: Joaquim Manuel Pereira Rocha <jrocha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ocrfeeder] Stop using ImageReviewer instances and use PageData ones for remaining actions
- Date: Sat, 3 Dec 2011 13:36:27 +0000 (UTC)
commit 80838ff2cfffdd5b3d80be572592904d84b19f4c
Author: Joaquim Rocha <jrocha igalia com>
Date: Sat Dec 3 03:13:01 2011 +0100
Stop using ImageReviewer instances and use PageData ones for remaining actions
ImageReviewer instances were still being used when trying to export
a page or when recognizing the document. These changes drop what
was still left from the ImageReviewer regular usage from the
previous commit and use the PageData ones instead.
src/ocrfeeder/studio/widgetModeler.py | 174 ++++++++++++++++++---------------
1 files changed, 96 insertions(+), 78 deletions(-)
---
diff --git a/src/ocrfeeder/studio/widgetModeler.py b/src/ocrfeeder/studio/widgetModeler.py
index 1bf8b04..7d97d50 100644
--- a/src/ocrfeeder/studio/widgetModeler.py
+++ b/src/ocrfeeder/studio/widgetModeler.py
@@ -126,6 +126,15 @@ class SourceImagesSelectorIconView(gtk.IconView):
return self.get_model().get_value(iter, 2)
return None
+ def getAllPages(self):
+ model = self.get_model()
+ pages = []
+ iter = model.get_iter_root()
+ while iter:
+ pages.append(model.get_value(iter, 2))
+ iter = model.iter_next(iter)
+ return pages
+
def setDeleteCurrentPageFunction(self, function):
self.delete_current_page_function = function
@@ -270,15 +279,7 @@ class ImageReviewer(gtk.HPaned):
boxes.reverse()
self.editor.saveDataBox()
if boxes:
- number_of_boxes = len(boxes)
- for i in range(number_of_boxes):
- box = boxes[i]
- data_box = self.boxes_dict.get(box)
- if data_box and data_box.getType() != TEXT_TYPE:
- continue
- text += data_box.getText()
- if number_of_boxes > 1 and i < number_of_boxes - 1:
- text += '\n\n'
+ text = getTextFromBoxes([self.boxes_dict.get(box) for box in boxes])
else:
if self.editor.box_editor.getType() == TEXT_TYPE:
text = self.box_editor.getText()
@@ -304,7 +305,8 @@ class ImageReviewer(gtk.HPaned):
boxes = boxes_sorted
return boxes
- def getPageData(self):
+ def savePageData(self):
+ self.editor.saveDataBox()
self.page.data_boxes = self.__getAllDataBoxes()
return self.page
@@ -373,7 +375,6 @@ class ImageReviewer_Controler:
ocr_engines, configuration_manager):
self.main_window = main_window
self.notebook = self.main_window.notebook
- self.image_reviewer_dict = {}
self.source_images_selector_widget = source_images_selector_widget
self.ocr_engines = ocr_engines
self.configuration_manager = configuration_manager
@@ -499,7 +500,7 @@ class ImageReviewer_Controler:
else:
zoom = int(reviewer.selectable_boxes_area.get_scale() * 100)
status_message = _('Zoom: %s %%') % zoom
- page_data = reviewer.getPageData()
+ page_data = reviewer.savePageData()
status_message += ' ' + _('Resolution: %.2f x %.2f') % (page_data.resolution[0],
page_data.resolution[1])
status_message += ' ' + _('Page size: %i x %i') % (page_data.width,
@@ -513,7 +514,7 @@ class ImageReviewer_Controler:
index = 0
while index < n_pages:
reviewer = self.notebook.get_nth_page(index)
- if reviewer.getPageData() == page_data:
+ if reviewer.savePageData() == page_data:
self.notebook.set_current_page(index)
return reviewer
index += 1
@@ -524,6 +525,15 @@ class ImageReviewer_Controler:
self.notebook.set_current_page(index)
return reviewer
+ def __updateImageReviewers(self):
+ n_pages = self.notebook.get_n_pages()
+ index = 0
+ while index < n_pages:
+ reviewer = self.notebook.get_nth_page(index)
+ reviewer.clear()
+ reviewer.updatePageData(reviewer.page)
+ index += 1
+
def recognizeSelectedAreas(self, widget):
image_reviewer = self.__getCurrentReviewer()
image_reviewer.performOcrForSelectedBoxes(self.configuration_manager.favorite_engine)
@@ -545,28 +555,29 @@ class ImageReviewer_Controler:
if image_reviewer.selectable_boxes_area.getAllAreas() and \
self.__confirmOveritePossibilityByRecognition() != gtk.RESPONSE_YES:
return
+ page = image_reviewer.page
dialog = QueuedEventsProgressDialog(self.main_window.window)
- item = AsyncItem(self.__performRecognitionForReviewer,
- (image_reviewer,),
- self.__performRecognitionForReviewerFinishedCb,
- (dialog, image_reviewer, [image_reviewer]))
+ item = AsyncItem(self.__performRecognitionForPage,
+ (page,),
+ self.__performRecognitionForPageFinishedCb,
+ (dialog, page, [page]))
info = (_('Recognizing Page'), _(u'Please waitâ'))
dialog.setItemsList([(info, item)])
dialog.run()
def recognizeDocument(self):
- reviewers = self.image_reviewer_dict.values()
+ pages = self.source_images_selector_widget.getAllPages()
dialog = QueuedEventsProgressDialog(self.main_window.window)
items = []
i = 1
- total = len(reviewers)
+ total = len(pages)
has_changes = False
- for reviewer in reviewers:
- has_changes = has_changes or bool(reviewer.selectable_boxes_area.getAllAreas())
- item = AsyncItem(self.__performRecognitionForReviewer,
- (reviewer,),
- self.__performRecognitionForReviewerFinishedCb,
- (dialog, reviewer, reviewers))
+ for page in pages:
+ has_changes = has_changes or bool(page.data_boxes)
+ item = AsyncItem(self.__performRecognitionForPage,
+ (page,),
+ self.__performRecognitionForPageFinishedCb,
+ (dialog, page, pages))
info = (_('Recognizing Document'),
_(u'Recognizing page %(page_number)s/%(total_pages)s. Please waitâ') % {'page_number': i,
'total_pages': total})
@@ -578,7 +589,7 @@ class ImageReviewer_Controler:
dialog.setItemsList(items)
dialog.run()
- def __performRecognitionForReviewer(self, image_reviewer):
+ def __performRecognitionForPage(self, page):
window_size = self.configuration_manager.window_size
if window_size == 'auto':
window_size = None
@@ -603,8 +614,8 @@ class ImageReviewer_Controler:
clean_text,
adjust_boxes_bounds,
adjustment_size)
- return layout_analysis.recognize(image_reviewer.path_to_image,
- image_reviewer.page.resolution[1])
+ return layout_analysis.recognize(page.image_path,
+ page.resolution[1])
def __getConfiguredOcrEngine(self):
for engine, path in self.ocr_engines:
@@ -612,17 +623,13 @@ class ImageReviewer_Controler:
return engine
return None
- def __performRecognitionForReviewerFinishedCb(self, dialog, image_reviewer,
- reviewers_to_process,
- data_boxes, error):
- image_reviewer.clear()
- image_reviewer.applyTextColors()
- for data_box in data_boxes:
- image_reviewer.addDataBox(data_box)
- if image_reviewer == reviewers_to_process[-1]:
+ def __performRecognitionForPageFinishedCb(self, dialog, page,
+ pages_to_process,
+ data_boxes, error):
+ page.data_boxes = data_boxes
+ if page == pages_to_process[-1]:
dialog.cancel()
- for reviewer in reviewers_to_process:
- reviewer.updateMainWindow()
+ self.__updateImageReviewers()
def copyRecognizedTextToClipboard(self, widget):
image_reviewer = self.__getCurrentReviewer()
@@ -631,56 +638,55 @@ class ImageReviewer_Controler:
def setDataBox(self, widget):
image_reviewer = self.__getCurrentReviewer()
document_generator = OdtGenerator()
- page_data = image_reviewer.getPageData()
+ page_data = image_reviewer.savePageData()
document_generator.addPage(page_data)
document_generator.save()
def exportPagesToHtml(self, pixbufs_sorted = []):
- image_reviewers = self.__askForNumberOfPages(_('Export to HTML'), pixbufs_sorted)
- if not image_reviewers:
+ pages = self.__askForNumberOfPages(_('Export to HTML'), pixbufs_sorted)
+ if not pages:
return
file_name = self.__askForFileName()
if file_name:
if os.path.exists(file_name):
os.remove(file_name)
document_generator = HtmlGenerator(file_name)
- for image_reviewer in image_reviewers:
- document_generator.addPage(image_reviewer.getPageData())
+ for page in pages:
+ document_generator.addPage(page)
document_generator.save()
def exportPagesToOdt(self, pixbufs_sorted = []):
- image_reviewers = self.__askForNumberOfPages(_('Export to ODT'), pixbufs_sorted)
- if not image_reviewers:
+ pages = self.__askForNumberOfPages(_('Export to ODT'), pixbufs_sorted)
+ if not pages:
return
file_name = self.__askForFileName()
if file_name:
document_generator = OdtGenerator(file_name)
- for image_reviewer in image_reviewers:
- document_generator.addPage(image_reviewer.getPageData())
+ for page in pages:
+ document_generator.addPage(page)
document_generator.save()
def exportPagesToPlaintext(self, pixbufs_sorted = []):
- image_reviewers = self.__askForNumberOfPages(_('Export to Plain Text'), pixbufs_sorted)
- if not image_reviewers:
+ pages = self.__askForNumberOfPages(_('Export to Plain Text'), pixbufs_sorted)
+ if not pages:
return
file_name = self.__askForFileName()
if file_name:
document_generator = PlaintextGenerator(file_name)
- for image_reviewer in image_reviewers:
- document_generator.addText(image_reviewer.getAllText())
+ for page in pages:
+ document_generator.addText(getTextFromBoxes(page.data_boxes))
document_generator.save()
def exportPagesToPdf(self, pixbufs_sorted = [], pdf_from_scratch = True):
- image_reviewers = self.__askForNumberOfPages(_('Export to PDF'),
- pixbufs_sorted)
- if not image_reviewers:
+ pages = self.__askForNumberOfPages(_('Export to PDF'), pixbufs_sorted)
+ if not pages:
return
file_name = self.__askForFileName()
if file_name:
document_generator = PdfGenerator(file_name, pdf_from_scratch)
- for image_reviewer in image_reviewers:
- document_generator.addPage(image_reviewer.getPageData())
+ for page in pages:
+ document_generator.addPage(page)
document_generator.save()
def saveProjectAs(self):
@@ -689,7 +695,7 @@ class ImageReviewer_Controler:
def saveProject(self, project_name):
if not project_name.endswith('.ocrf'):
project_name += '.ocrf'
- pages_data = self.getPagesData(self.getPixbufsSorted())
+ pages_data = self.source_images_selector_widget.getAllPages()
project_saver = ProjectSaver(pages_data)
project_saver.serialize(project_name)
@@ -712,26 +718,24 @@ class ImageReviewer_Controler:
return project_file
def __askForNumberOfPages(self, title, pixbufs_sorted):
+ # Sync the current reviewer's page with its data
+ self.__getCurrentReviewer().savePageData()
export_dialog = PagesToExportDialog(title)
- image_reviewers = self.getImageReviewers(pixbufs_sorted)
+ pages = self.source_images_selector_widget.getAllPages()
# When there's only one document loaded or none,
# we don't ask for the number of pages to export
- if len(image_reviewers) < 2:
- return image_reviewers
+ if len(pages) < 2:
+ return pages
response = export_dialog.run()
if response == gtk.RESPONSE_ACCEPT:
if export_dialog.current_page_button.get_active():
- image_reviewers = [self.__getCurrentReviewer()]
+ pages = [self.__getCurrentReviewer().page]
export_dialog.destroy()
- return image_reviewers
+ return pages
else:
export_dialog.destroy()
return None
- def getPagesData(self, pixbufs_sorted):
- image_reviewers = self.getImageReviewers(pixbufs_sorted)
- return [reviewer.getPageData() for reviewer in image_reviewers]
-
def __askForFileName(self, extension = ''):
save_dialog = FileDialog('save')
response = save_dialog.run()
@@ -766,24 +770,23 @@ class ImageReviewer_Controler:
if response == gtk.RESPONSE_ACCEPT:
size = page_size_dialog.getSize()
if page_size_dialog.all_pages_radio.get_active():
- for key, reviewer in self.image_reviewer_dict.items():
- reviewer.page.setSize(size)
+ for page in self.source_images_selector_widget.getAllPages():
+ page.setSize(size)
else:
current_reviewer.page.setSize(size)
debug('Page size: ', size)
page_size_dialog.destroy()
+ self.__updateStatusBar(current_reviewer)
def __getCurrentReviewer(self):
return self.notebook.get_nth_page(self.notebook.get_current_page())
def deleteCurrentPage(self):
current_reviewer = self.__getCurrentReviewer()
- for pixbuf, image_reviewer in self.image_reviewer_dict.items():
- if image_reviewer == current_reviewer:
- del self.image_reviewer_dict[pixbuf]
- self.notebook.remove_page(self.notebook.get_current_page())
- break
- self.__updateStatusBar()
+ if not current_reviewer:
+ return
+ index = self.notebook.page_num(current_reviewer)
+ self.notebook.remove_page(index)
def unpaperTool(self):
current_reviewer = self.__getCurrentReviewer()
@@ -798,9 +801,9 @@ class ImageReviewer_Controler:
unpaper_dialog.destroy()
def clear(self):
- for pixbuf in self.image_reviewer_dict.keys():
- del self.image_reviewer_dict[pixbuf]
- self.notebook.remove_page(self.notebook.get_current_page())
+ # remove all pages from notebook
+ for index in range(self.notebook.get_n_pages()):
+ self.notebook.remove_page(0)
self.source_images_selector_widget.clear()
self.__updateStatusBar()
@@ -808,7 +811,8 @@ class ImageReviewer_Controler:
return self.source_images_selector_widget.source_images_selector.getPixbufsSorted()
def updateFromConfiguration(self):
- for reviewer in self.image_reviewer_dict.values():
+ for index in range(self.notebook.get_n_pages()):
+ reviewer = self.notebook.get_nth_page(index)
reviewer.setTextFillColor(self.configuration_manager.text_fill)
reviewer.setBoxesStrokeColor(self.configuration_manager.boxes_stroke)
reviewer.setImageFillColor(self.configuration_manager.image_fill)
@@ -1097,3 +1101,17 @@ class Editor:
else:
self.reviewer.main_window.copy_to_clipboard_menu.set_sensitive(True)
self.reviewer.main_window.spellchecker_menu.set_sensitive(True)
+
+
+# Helper function to get text from data boxes
+def getTextFromBoxes(data_boxes):
+ text = ''
+ number_of_boxes = len(data_boxes)
+ for i in range(number_of_boxes):
+ data_box = data_boxes[i]
+ if data_box and data_box.getType() != TEXT_TYPE:
+ continue
+ text += data_box.getText()
+ if number_of_boxes > 1 and i < number_of_boxes - 1:
+ text += '\n\n'
+ return text
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]