[tracker] tracker-extract: Fixed PS extractor to use TrackerSparqlBuilder
- From: Martyn James Russell <mr src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [tracker] tracker-extract: Fixed PS extractor to use TrackerSparqlBuilder
- Date: Wed, 4 Nov 2009 16:48:34 +0000 (UTC)
commit b1c0f012ddd2c3f6f0f398734fe32aa177352408
Author: Martyn Russell <martyn lanedo com>
Date: Wed Nov 4 16:29:15 2009 +0000
tracker-extract: Fixed PS extractor to use TrackerSparqlBuilder
src/tracker-extract/tracker-extract-ps.c | 197 +++++++++++++++---------------
1 files changed, 97 insertions(+), 100 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-ps.c b/src/tracker-extract/tracker-extract-ps.c
index 9e12d7b..cbec510 100644
--- a/src/tracker-extract/tracker-extract-ps.c
+++ b/src/tracker-extract/tracker-extract-ps.c
@@ -1,7 +1,7 @@
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* Copyright (C) 2007, Mr Jamie McCracken (jamiemcc gnome org)
- * Copyright (C) 2008, Nokia
+ * Copyright (C) 2008-2009, Nokia
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
@@ -19,12 +19,12 @@
* Boston, MA 02110-1301, USA.
*/
+#include "config.h"
+
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
-#include "config.h"
-
#include <fcntl.h>
#include <string.h>
#include <sys/types.h>
@@ -42,13 +42,6 @@
#include "tracker-main.h"
-#define NIE_PREFIX TRACKER_NIE_PREFIX
-#define NFO_PREFIX TRACKER_NFO_PREFIX
-#define NCO_PREFIX TRACKER_NCO_PREFIX
-
-#define RDF_PREFIX TRACKER_RDF_PREFIX
-#define RDF_TYPE RDF_PREFIX "type"
-
#ifndef HAVE_GETLINE
#include <stddef.h>
@@ -59,21 +52,21 @@
#undef getdelim
#undef getline
-#define GROWBY 80
+#define GROW_BY 80
#endif /* HAVE_GETLINE */
#ifdef USING_UNZIPPSFILES
static void extract_ps_gz (const gchar *uri,
- TrackerSparqlBuilder *metadata);
+ TrackerSparqlBuilder *metadata);
#endif
static void extract_ps (const gchar *uri,
- TrackerSparqlBuilder *metadata);
+ TrackerSparqlBuilder *metadata);
static TrackerExtractData data[] = {
#ifdef USING_UNZIPPSFILES
{ "application/x-gzpostscript", extract_ps_gz },
-#endif
+#endif /* USING_UNZIPPSFILES */
{ "application/postscript", extract_ps },
{ NULL, NULL }
};
@@ -96,14 +89,14 @@ igetdelim (gchar **linebuf,
}
if (*linebuf == NULL && *linebufsz == 0) {
- *linebuf = g_malloc (GROWBY);
+ *linebuf = g_malloc (GROW_BY);
if (!*linebuf) {
errno = ENOMEM;
return -1;
}
- *linebufsz += GROWBY;
+ *linebufsz += GROW_BY;
}
idx = 0;
@@ -111,7 +104,7 @@ igetdelim (gchar **linebuf,
while ((ch = fgetc (file)) != EOF) {
/* Grow the line buffer as necessary */
while (idx > *linebufsz - 2) {
- *linebuf = g_realloc (*linebuf, *linebufsz += GROWBY);
+ *linebuf = g_realloc (*linebuf, *linebufsz += GROW_BY);
if (!*linebuf) {
errno = ENOMEM;
@@ -186,21 +179,25 @@ date_to_iso8601 (const gchar *date)
if (date && date[1] && date[2]) {
if (date[0] == '(') {
/* we have probably a date like
- "(18:07 Tuesday 22 May 2007)" */
+ * "(18:07 Tuesday 22 May 2007)"
+ */
return hour_day_str_day (date);
} else if (g_ascii_isalpha (date[0])) {
/* we have probably a date like
- "Tue May 22 18:07:10 2007" */
+ * "Tue May 22 18:07:10 2007"
+ */
return day_str_month_day (date);
} else if (date[1] == ' ' || date[2] == ' ') {
/* we have probably a date like
- "22 May 1997 18:07:10 -0600" */
+ * "22 May 1997 18:07:10 -0600"
+ */
return day_month_year_date (date);
} else if (date[1] == ':' || date[2] == ':') {
/* we have probably a date like
- "6:07 PM May 22, 2007" */
+ * "6:07 PM May 22, 2007"
+ */
return hour_month_day_date (date);
}
}
@@ -209,102 +206,101 @@ date_to_iso8601 (const gchar *date)
}
static void
-extract_ps (const gchar *uri,
- TrackerSparqlBuilder *metadata)
+extract_ps (const gchar *uri,
+ TrackerSparqlBuilder *metadata)
{
FILE *f;
- gchar *filename = g_filename_from_uri (uri, NULL, NULL);
-
+ gchar *filename;
+ gchar *line;
+ gsize length;
+ gssize read_char;
+
+ filename = g_filename_from_uri (uri, NULL, NULL);
f = tracker_file_open (filename, "r", TRUE);
+ g_free (filename);
- if (f) {
- gchar *line;
- gsize length;
- gssize read_char;
+ if (!f) {
+ return;
+ }
- line = NULL;
- length = 0;
+ line = NULL;
+ length = 0;
- tracker_statement_list_insert (metadata, uri,
- RDF_TYPE,
- NFO_PREFIX "PaginatedTextDocument");
-
- while ((read_char = getline (&line, &length, f)) != -1) {
- gboolean pageno_atend = FALSE;
- gboolean header_finished = FALSE;
-
- line[read_char - 1] = '\0'; /* overwrite '\n' char */
-
- if (!header_finished && strncmp (line, "%%Copyright:", 12) == 0) {
- tracker_statement_list_insert (metadata, uri,
- NIE_PREFIX "copyright",
- line + 13);
-
- } else if (!header_finished && strncmp (line, "%%Title:", 8) == 0) {
- tracker_statement_list_insert (metadata, uri,
- NIE_PREFIX "title",
- line + 9);
-
- } else if (!header_finished && strncmp (line, "%%Creator:", 10) == 0) {
-
- tracker_statement_list_insert (metadata, ":", RDF_TYPE, NCO_PREFIX "Contact");
- tracker_statement_list_insert (metadata, ":", NCO_PREFIX "fullname", line + 11);
- tracker_statement_list_insert (metadata, uri, NCO_PREFIX "creator", ":");
-
- } else if (!header_finished && strncmp (line, "%%CreationDate:", 15) == 0) {
- gchar *date;
-
- date = date_to_iso8601 (line + 16);
-
- if (date) {
- tracker_statement_list_insert (metadata, uri,
- NIE_PREFIX "contentCreated",
- date);
-
- g_free (date);
- }
- } else if (strncmp (line, "%%Pages:", 8) == 0) {
- if (strcmp (line + 9, "(atend)") == 0) {
- pageno_atend = TRUE;
- } else {
- tracker_statement_list_insert (metadata, uri,
- NFO_PREFIX "pageCount",
- line + 9);
- }
- } else if (strncmp (line, "%%EndComments", 14) == 0) {
- header_finished = TRUE;
-
- if (!pageno_atend) {
- break;
- }
+ tracker_sparql_builder_subject_iri (metadata, uri);
+ tracker_sparql_builder_predicate (metadata, "a");
+ tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument");
+
+ while ((read_char = getline (&line, &length, f)) != -1) {
+ gboolean pageno_atend = FALSE;
+ gboolean header_finished = FALSE;
+
+ line[read_char - 1] = '\0'; /* overwrite '\n' char */
+
+ if (!header_finished && strncmp (line, "%%Copyright:", 12) == 0) {
+ tracker_sparql_builder_predicate (metadata, "nie:copyright");
+ tracker_sparql_builder_object_unvalidated (metadata, line + 13);
+ } else if (!header_finished && strncmp (line, "%%Title:", 8) == 0) {
+ tracker_sparql_builder_predicate (metadata, "nie:title");
+ tracker_sparql_builder_object_unvalidated (metadata, line + 9);
+ } else if (!header_finished && strncmp (line, "%%Creator:", 10) == 0) {
+ tracker_sparql_builder_predicate (metadata, "nco:creator");
+ tracker_sparql_builder_object_blank_open (metadata);
+ tracker_sparql_builder_predicate (metadata, "a");
+ tracker_sparql_builder_object (metadata, "nco:Contact");
+ tracker_sparql_builder_predicate (metadata, "nco:fullname");
+ tracker_sparql_builder_object_unvalidated (metadata, line + 11);
+ tracker_sparql_builder_object_blank_close (metadata);
+ } else if (!header_finished && strncmp (line, "%%CreationDate:", 15) == 0) {
+ gchar *date;
+
+ date = date_to_iso8601 (line + 16);
+ if (date) {
+ tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+ tracker_sparql_builder_object_unvalidated (metadata, date);
+ g_free (date);
}
+ } else if (strncmp (line, "%%Pages:", 8) == 0) {
+ if (strcmp (line + 9, "(atend)") == 0) {
+ pageno_atend = TRUE;
+ } else {
+ gint64 page_count;
+
+ page_count = g_ascii_strtoll (line + 9, NULL, 10);
+ tracker_sparql_builder_predicate (metadata, "nfo:pageCount");
+ tracker_sparql_builder_object_int64 (metadata, page_count);
+ }
+ } else if (strncmp (line, "%%EndComments", 14) == 0) {
+ header_finished = TRUE;
- g_free (line);
- line = NULL;
- length = 0;
+ if (!pageno_atend) {
+ break;
+ }
}
- if (line) {
- g_free (line);
- }
+ g_free (line);
+ line = NULL;
+ length = 0;
+ }
- tracker_file_close (f, FALSE);
+ if (line) {
+ g_free (line);
}
- g_free (filename);
+ tracker_file_close (f, FALSE);
}
#ifdef USING_UNZIPPSFILES
+
static void
-extract_ps_gz (const gchar *uri,
- TrackerSparqlBuilder *metadata)
+extract_ps_gz (const gchar *uri,
+ TrackerSparqlBuilder *metadata)
{
- FILE *fz, *f;
- GError *error = NULL;
- gchar *gunzipped;
- gint fdz;
- gint fd;
- gboolean ptat;
+ FILE *fz, *f;
+ GError *error = NULL;
+ gchar *gunzipped;
+ gint fdz;
+ gint fd;
+ gboolean ptat;
const gchar *argv[4];
gchar *filename;
@@ -390,7 +386,8 @@ extract_ps_gz (const gchar *uri,
g_unlink (gunzipped);
g_free (filename);
}
-#endif
+
+#endif /* USING_UNZIPPSFILES */
TrackerExtractData *
tracker_get_extract_data (void)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]