[tracker/tracker-0.14] tracker-extract: add a simple extractor for DVI files



commit 8f2df3741b8bd3172ee234f57f48dcc95f7fd84b
Author: Cosimo Cecchi <cosimoc gnome org>
Date:   Mon Oct 29 23:26:35 2012 -0400

    tracker-extract: add a simple extractor for DVI files
    
    Code to extract information from the headers is taken from Evince.
    
    Fixes GB#687162

 src/tracker-extract/10-dvi.rule.in        |    4 +
 src/tracker-extract/Makefile.am           |   14 ++
 src/tracker-extract/tracker-extract-dvi.c |  250 +++++++++++++++++++++++++++++
 3 files changed, 268 insertions(+), 0 deletions(-)
---
diff --git a/src/tracker-extract/10-dvi.rule.in b/src/tracker-extract/10-dvi.rule.in
new file mode 100644
index 0000000..c575400
--- /dev/null
+++ b/src/tracker-extract/10-dvi.rule.in
@@ -0,0 +1,4 @@
+[ExtractorRule]
+ModulePath= modulesdir@/libextract-dvi.so
+MimeTypes=application/x-dvi
+FallbackRdfType=nfo:Document
diff --git a/src/tracker-extract/Makefile.am b/src/tracker-extract/Makefile.am
index 39203e9..e8d5320 100644
--- a/src/tracker-extract/Makefile.am
+++ b/src/tracker-extract/Makefile.am
@@ -18,6 +18,7 @@ AM_CPPFLAGS = \
 # date.
 rules_in_files = \
 	10-abw.rule.in \
+	10-dvi.rule.in \
 	10-epub.rule.in \
 	10-flac.rule.in \
 	10-gif.rule.in \
@@ -53,6 +54,7 @@ rules_in_files = \
 # against it if we directly use functions in the .so
 modules_LTLIBRARIES = \
 	libextract-abw.la \
+	libextract-dvi.la \
 	libextract-mp3.la \
 	libextract-png.la \
 	libextract-ps.la \
@@ -61,6 +63,7 @@ modules_LTLIBRARIES = \
 
 rules_DATA = \
 	10-abw.rule \
+	10-dvi.rule \
 	10-ico.rule \
 	10-mp3.rule \
 	10-png.rule \
@@ -318,6 +321,17 @@ libextract_pdf_la_LIBADD = \
 	$(TRACKER_EXTRACT_MODULES_LIBS) \
 	$(POPPLER_LIBS)
 
+# DVI
+libextract_dvi_la_SOURCES = tracker-extract-dvi.c
+libextract_dvi_la_CFLAGS = \
+	$(TRACKER_EXTRACT_MODULES_CFLAGS)
+libextract_dvi_la_LDFLAGS = $(module_flags)
+libextract_dvi_la_LIBADD = \
+	$(top_builddir)/src/libtracker-extract/libtracker-extract- TRACKER_API_VERSION@.la \
+	$(top_builddir)/src/libtracker-common/libtracker-common.la \
+	$(BUILD_LIBS) \
+	$(TRACKER_EXTRACT_MODULES_LIBS)
+
 # GStreamer
 libextract_gstreamer_la_SOURCES = \
 	tracker-extract-gstreamer.c \
diff --git a/src/tracker-extract/tracker-extract-dvi.c b/src/tracker-extract/tracker-extract-dvi.c
new file mode 100644
index 0000000..5ddb2dd
--- /dev/null
+++ b/src/tracker-extract/tracker-extract-dvi.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright (C) 2012, Red Hat, Inc.
+ *
+ *   Code adapted from evince/backend/dvi/mdvi-lib/dviread.c
+ *   Copyright (C) 2000, Matias Atria
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <glib.h>
+#include <gmodule.h>
+
+#include <libtracker-extract/tracker-extract.h>
+
+#define __PROTO(x)	x
+extern gulong	fugetn __PROTO((FILE *, size_t));
+
+#define fgetbyte(p)	((unsigned)getc(p))
+#define fuget4(p)	fugetn((p), 4)
+#define fuget3(p)	fugetn((p), 3)
+#define fuget2(p)	fugetn((p), 2)
+#define fuget1(p)	fgetbyte(p)
+
+
+#define DVI_ID		2
+#define DVI_TRAILER	223
+#define DVI_PRE		247
+#define DVI_POST	248
+
+typedef struct {
+	char *filename;	/* name of the DVI file */
+	FILE *in;	/* from here we read */
+	char *fileid;	/* from preamble */
+	int npages;	/* number of pages */
+	int depth;	/* recursion depth */
+	gint32 num;	/* numerator */
+	gint32 den;	/* denominator */
+	gint32 dvimag;	/* original magnification */
+	int dvi_page_w;	/* unscaled page width */
+	int dvi_page_h;	/* unscaled page height */
+	int stacksize;	/* stack depth */
+} DviContext;
+
+gulong
+fugetn (FILE *p, size_t n)
+{
+	gulong v;
+
+	v = fgetbyte(p);
+	while (--n > 0) {
+		v = (v << 8) | fgetbyte(p);
+	}
+
+	return v;
+}
+
+static char *
+opendvi (const char *name)
+{
+	int len;
+
+	len = strlen (name);
+
+	/* if file ends with .dvi and it exists, that's it */
+	if (len >= 4 && g_strcmp0 (name + len - 4, ".dvi") == 0) {
+		g_debug ("Opening filename:'%s'", name);
+
+		if (access (name, R_OK) == 0) {
+			return g_strdup (name);
+		}
+	}
+
+	return NULL;
+}
+
+static void
+mdvi_destroy_context (DviContext *dvi)
+{
+	g_free (dvi->filename);
+	g_free (dvi->fileid);
+
+	if (dvi->in) {
+		fclose (dvi->in);
+	}
+
+	g_free (dvi);
+}
+
+static DviContext *
+mdvi_init_context (const char *file)
+{
+	FILE *p;
+	gint32 arg;
+	int op;
+	int n;
+	DviContext *dvi;
+	char *filename;
+
+	/*
+	 * 1. Open the file and initialize the DVI context
+	 */
+	filename = opendvi (file);
+	if (filename == NULL) {
+		return NULL;
+	}
+
+	p = fopen (filename, "rb");
+	if (p == NULL) {
+		g_free (filename);
+		return NULL;
+	}
+
+	dvi = g_new0 (DviContext, 1);
+	dvi->filename = filename;
+	dvi->in = p;
+
+	/*
+	 * 2. Read the preamble, extract scaling information
+	 */
+	if (fuget1 (p) != DVI_PRE) {
+		goto error;
+	}
+
+	if ((arg = fuget1 (p)) != DVI_ID) {
+		g_message ("Unsupported DVI format (version %u)", arg);
+		goto error;
+	}
+
+	/* get dimensions */
+	dvi->num = fuget4 (p);
+	dvi->den = fuget4 (p);
+	dvi->dvimag = fuget4 (p);
+
+	/* check that these numbers make sense */
+	if (!dvi->num || !dvi->den || !dvi->dvimag) {
+		goto error;
+	}
+
+	/* get the comment from the preamble */
+	n = fuget1 (p);
+	dvi->fileid = g_malloc (n + 1);
+	fread (dvi->fileid, 1, n, p);
+	dvi->fileid[n] = 0;
+	g_debug ("Preamble Comment: '%s'", dvi->fileid);
+
+	/*
+	 * 3. Read postamble, extract page information (number of
+	 *    pages, dimensions) and stack depth.
+	 */
+
+	/* jump to the end of the file */
+	if (fseek (p, (long) - 1, SEEK_END) == -1) {
+		goto error;
+	}
+
+	for (n = 0; (op = fuget1 (p)) == DVI_TRAILER; n++) {
+		if (fseek (p, (long) - 2, SEEK_CUR) < 0) {
+			break;
+		}
+	}
+
+	if (op != arg || n < 4) {
+		goto error;
+	}
+
+	/* get the pointer to postamble */
+	fseek (p, (long) - 5, SEEK_CUR);
+	arg = fuget4 (p);
+
+	/* jump to it */
+	fseek (p, (long) arg, SEEK_SET);
+	if (fuget1 (p) != DVI_POST) {
+		goto error;
+	}
+
+	fuget4 (p); /* offset */
+	if (dvi->num != fuget4 (p) ||
+	    dvi->den != fuget4 (p) ||
+	    dvi->dvimag != fuget4 (p)) {
+		goto error;
+	}
+	dvi->dvi_page_h = fuget4 (p);
+	dvi->dvi_page_w = fuget4 (p);
+	dvi->stacksize = fuget2 (p);
+	dvi->npages = fuget2 (p);
+
+	g_debug ("Postamble: %d pages", dvi->npages);
+
+	return dvi;
+
+error:
+	mdvi_destroy_context (dvi);
+	return NULL;
+}
+
+G_MODULE_EXPORT gboolean
+tracker_extract_get_metadata (TrackerExtractInfo *info)
+{
+	TrackerSparqlBuilder *metadata;
+	GFile *file;
+	gchar *filename;
+	DviContext *context;
+
+	g_type_init ();
+
+	metadata = tracker_extract_info_get_metadata_builder (info);
+	file = tracker_extract_info_get_file (info);
+	filename = g_file_get_path (file);
+
+	context = mdvi_init_context (filename);
+
+	if (context == NULL) {
+		g_warning ("Could not open dvi file '%s'\n", filename);
+		g_free (filename);
+		return FALSE;
+	}
+
+	tracker_sparql_builder_predicate (metadata, "a");
+	tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument");
+
+	tracker_sparql_builder_predicate (metadata, "nfo:pageCount");
+	tracker_sparql_builder_object_int64 (metadata, context->npages);
+
+	if (context->fileid) {
+		tracker_sparql_builder_predicate (metadata, "nie:comment");
+		tracker_sparql_builder_object_unvalidated (metadata, context->fileid);
+	}
+
+	mdvi_destroy_context (context);
+
+	return TRUE;
+}



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]