[tracker/chunck-pdf] Experimental
- From: Philip Van Hoof <pvanhoof src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/chunck-pdf] Experimental
- Date: Fri, 18 Nov 2011 13:45:52 +0000 (UTC)
commit d4e4493fea310917b45f5cba5e3bbc95958d7274
Author: Philip Van Hoof <philip codeminded be>
Date: Fri Nov 18 14:45:30 2011 +0100
Experimental
src/tracker-extract/tracker-extract-pdf.c | 49 +++++++++++++++++++----------
1 files changed, 32 insertions(+), 17 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-pdf.c b/src/tracker-extract/tracker-extract-pdf.c
index af31369..ce162f0 100644
--- a/src/tracker-extract/tracker-extract-pdf.c
+++ b/src/tracker-extract/tracker-extract-pdf.c
@@ -203,32 +203,47 @@ extract_content (PopplerDocument *document,
g_timer_elapsed (timer, NULL) < 5) {
PopplerPage *page;
gsize written_bytes;
- gchar *text;
+ gchar *text;
+ PopplerRectangle rectangle = {0, 0, 0, 0};
+ double height = 0, piece;
+ gint part;
page = poppler_document_get_page (document, i);
i++;
- text = poppler_page_get_text (page);
+ poppler_page_get_size (page, &rectangle.x2, &height);
+ piece = height / 10;
- if (!text) {
- g_object_unref (page);
- continue;
- }
+ for (part = 1; part <= 10 && g_timer_elapsed (timer, NULL) < 105; part++) {
- if (tracker_text_validate_utf8 (text,
- MIN (strlen (text), remaining_bytes),
- &string,
- &written_bytes)) {
- g_string_append_c (string, ' ');
- }
+ rectangle.y1 = piece * (part - 1);
+ rectangle.y2 = piece * part;
+
+
+ text = poppler_page_get_selected_text (page, POPPLER_SELECTION_GLYPH, &rectangle);
+
+ g_print ("from %f to %f took %fs\n", rectangle.y1, rectangle.y2, g_timer_elapsed (timer, NULL));
- remaining_bytes -= written_bytes;
+ if (!text) {
+ continue;
+ }
+
+ if (tracker_text_validate_utf8 (text,
+ MIN (strlen (text), remaining_bytes),
+ &string,
+ &written_bytes)) {
+ g_string_append_c (string, ' ');
+ }
- g_debug ("Extracted %" G_GSIZE_FORMAT " bytes from page %d, "
- "%" G_GSIZE_FORMAT " bytes remaining",
- written_bytes, i, remaining_bytes);
+ remaining_bytes -= written_bytes;
+
+ g_debug ("Extracted %" G_GSIZE_FORMAT " bytes from page %d, "
+ "%" G_GSIZE_FORMAT " bytes remaining",
+ written_bytes, i, remaining_bytes);
+
+ g_free (text);
+ }
- g_free (text);
g_object_unref (page);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]