[tracker/miner-web: 44/76] Fixes GB#609075, Adding support for pdf extractor to extract the index data from the pdf files



commit 066210f8769bb4a6084f16a763b23cac5f969c63
Author: Amin Jain <ext-amit 1 jain nokia com>
Date:   Thu Feb 25 15:28:21 2010 +0000

    Fixes GB#609075, Adding support for pdf extractor to extract the index data from the pdf files

 src/tracker-extract/tracker-extract-pdf.c |  132 +++++++++++++++++++++++++++++
 1 files changed, 132 insertions(+), 0 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-pdf.c b/src/tracker-extract/tracker-extract-pdf.c
index 32490d1..b651765 100644
--- a/src/tracker-extract/tracker-extract-pdf.c
+++ b/src/tracker-extract/tracker-extract-pdf.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
  * Copyright (C) 2008-2009, Nokia
+ * Copyright (C) 2010, Amit Aggarwal (amitcs06 gmail com)
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -56,6 +57,135 @@ static TrackerExtractData data[] = {
 };
 
 static void
+read_toc (PopplerIndexIter  *index,
+          GString          **toc)
+{
+        if (!index) {
+	        return;
+        }
+
+        if (!*toc) {
+	        *toc = g_string_new ("");
+        }
+
+        do {
+                PopplerAction *action;
+                PopplerIndexIter *iter;
+
+                action = poppler_index_iter_get_action (index);
+
+                if (!action) {
+                        continue;
+                }
+
+                switch (action->type) {
+                case POPPLER_ACTION_GOTO_DEST: {
+                        PopplerActionGotoDest *ag = (PopplerActionGotoDest*) action;
+                        PopplerDest *agd = ag->dest;
+
+                        if (!tracker_is_empty_string (ag->title)) {
+	                        g_string_append_printf (*toc, "%s ", ag->title);
+                        }
+
+                        if (!tracker_is_empty_string (agd->named_dest)) {
+	                        g_string_append_printf (*toc, "%s ", agd->named_dest);
+                        }
+
+                        break;
+                }
+
+                case POPPLER_ACTION_LAUNCH: {
+                        PopplerActionLaunch *al = (PopplerActionLaunch*) action;
+
+                        if (!tracker_is_empty_string (al->title)) {
+	                        g_string_append_printf (*toc, "%s ", al->title);
+                        }
+
+                        if (!tracker_is_empty_string (al->file_name)) {
+	                        g_string_append_printf (*toc, "%s ", al->file_name);
+                        }
+
+                        if (!tracker_is_empty_string (al->params)) {
+	                        g_string_append_printf (*toc, "%s ", al->params);
+                        }
+
+                        break;
+                }
+
+                case POPPLER_ACTION_URI: {
+                        PopplerActionUri *au = (PopplerActionUri*) action;
+
+                        if (!tracker_is_empty_string (au->uri)) {
+	                        g_string_append_printf (*toc, "%s ", au->uri);
+                        }
+
+                        break;
+                }
+
+                case POPPLER_ACTION_NAMED: {
+                        PopplerActionNamed *an = (PopplerActionNamed*) action;
+
+                        if (!tracker_is_empty_string (an->title)) {
+	                        g_string_append_printf (*toc, "%s, ", an->title);
+                        }
+
+                        if (!tracker_is_empty_string (an->named_dest)) {
+                                g_string_append_printf (*toc, "%s ", an->named_dest);
+                        }
+
+                        break;
+                }
+
+                case POPPLER_ACTION_MOVIE: {
+                        PopplerActionNamed *am = (PopplerActionNamed*) action;
+
+                        if (!tracker_is_empty_string (am->title)) {
+	                        g_string_append_printf (*toc, "%s ", am->title);
+                        }
+
+                        break;
+                }
+
+                case POPPLER_ACTION_NONE:
+                case POPPLER_ACTION_UNKNOWN:
+                case POPPLER_ACTION_GOTO_REMOTE:
+                        /* Do nothing */
+                        break;
+                }
+
+                iter = poppler_index_iter_get_child (index);
+                read_toc (iter, toc);
+        } while (poppler_index_iter_next (index));
+
+        poppler_index_iter_free (index);
+}
+
+static void
+read_outline (PopplerDocument      *document,
+              TrackerSparqlBuilder *metadata)
+{
+        PopplerIndexIter *index;
+        GString *toc = NULL;
+
+        index = poppler_index_iter_new (document);
+
+        if (!index) {
+                return;
+        }
+
+        read_toc (index, &toc);
+
+        if (toc) {
+	        if (toc->len > 0) {
+		        tracker_sparql_builder_predicate (metadata, "nfo:tableOfContents");
+		        tracker_sparql_builder_object_unvalidated (metadata, toc->str);
+	        }
+
+	        g_string_free (toc, TRUE);
+        }
+}
+
+static void
 insert_keywords (TrackerSparqlBuilder *metadata,
                  gchar                *keywords)
 {
@@ -466,6 +596,8 @@ extract_pdf (const gchar          *uri,
 		g_free (content);
 	}
 
+        read_outline (document, metadata);
+
 	g_object_unref (document);
 }
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]