[tracker] tracker-extract: Fixed PS extractor to use TrackerSparqlBuilder



commit b1c0f012ddd2c3f6f0f398734fe32aa177352408
Author: Martyn Russell <martyn lanedo com>
Date:   Wed Nov 4 16:29:15 2009 +0000

    tracker-extract: Fixed PS extractor to use TrackerSparqlBuilder

 src/tracker-extract/tracker-extract-ps.c |  197 +++++++++++++++---------------
 1 files changed, 97 insertions(+), 100 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-ps.c b/src/tracker-extract/tracker-extract-ps.c
index 9e12d7b..cbec510 100644
--- a/src/tracker-extract/tracker-extract-ps.c
+++ b/src/tracker-extract/tracker-extract-ps.c
@@ -1,7 +1,7 @@
 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
 /*
  * Copyright (C) 2007, Mr Jamie McCracken (jamiemcc gnome org)
- * Copyright (C) 2008, Nokia
+ * Copyright (C) 2008-2009, Nokia
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
@@ -19,12 +19,12 @@
  * Boston, MA  02110-1301, USA.
  */
 
+#include "config.h"
+
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE
 #endif
 
-#include "config.h"
-
 #include <fcntl.h>
 #include <string.h>
 #include <sys/types.h>
@@ -42,13 +42,6 @@
 
 #include "tracker-main.h"
 
-#define NIE_PREFIX TRACKER_NIE_PREFIX
-#define NFO_PREFIX TRACKER_NFO_PREFIX
-#define NCO_PREFIX TRACKER_NCO_PREFIX
-
-#define RDF_PREFIX TRACKER_RDF_PREFIX
-#define RDF_TYPE RDF_PREFIX "type"
-
 #ifndef HAVE_GETLINE
 
 #include <stddef.h>
@@ -59,21 +52,21 @@
 #undef getdelim
 #undef getline
 
-#define GROWBY 80
+#define GROW_BY 80
 
 #endif /* HAVE_GETLINE */
 
 #ifdef USING_UNZIPPSFILES
 static void extract_ps_gz (const gchar *uri,
-			   TrackerSparqlBuilder  *metadata);
+			   TrackerSparqlBuilder *metadata);
 #endif
 static void extract_ps	  (const gchar *uri,
-			   TrackerSparqlBuilder   *metadata);
+			   TrackerSparqlBuilder *metadata);
 
 static TrackerExtractData data[] = {
 #ifdef USING_UNZIPPSFILES
 	{ "application/x-gzpostscript",	extract_ps_gz },
-#endif
+#endif /* USING_UNZIPPSFILES */
 	{ "application/postscript",	extract_ps    },
 	{ NULL, NULL }
 };
@@ -96,14 +89,14 @@ igetdelim (gchar  **linebuf,
 	}
 
 	if (*linebuf == NULL && *linebufsz == 0) {
-		*linebuf = g_malloc (GROWBY);
+		*linebuf = g_malloc (GROW_BY);
 
 		if (!*linebuf) {
 			errno = ENOMEM;
 			return -1;
 		}
 
-		*linebufsz += GROWBY;
+		*linebufsz += GROW_BY;
 	}
 
 	idx = 0;
@@ -111,7 +104,7 @@ igetdelim (gchar  **linebuf,
 	while ((ch = fgetc (file)) != EOF) {
 		/* Grow the line buffer as necessary */
 		while (idx > *linebufsz - 2) {
-			*linebuf = g_realloc (*linebuf, *linebufsz += GROWBY);
+			*linebuf = g_realloc (*linebuf, *linebufsz += GROW_BY);
 
 			if (!*linebuf) {
 				errno = ENOMEM;
@@ -186,21 +179,25 @@ date_to_iso8601 (const gchar *date)
 	if (date && date[1] && date[2]) {
 		if (date[0] == '(') {
 			/* we have probably a date like
-			   "(18:07 Tuesday 22 May 2007)" */
+			 * "(18:07 Tuesday 22 May 2007)" 
+			 */
 			return hour_day_str_day (date);
 		} else if (g_ascii_isalpha (date[0])) {
 			/* we have probably a date like
-			   "Tue May 22 18:07:10 2007" */
+			 * "Tue May 22 18:07:10 2007" 
+			 */
 			return day_str_month_day (date);
 
 		} else if (date[1] == ' ' || date[2] == ' ') {
 			/* we have probably a date like
-			   "22 May 1997 18:07:10 -0600" */
+			 * "22 May 1997 18:07:10 -0600" 
+			 */
 			return day_month_year_date (date);
 
 		} else if (date[1] == ':' || date[2] == ':') {
 			/* we have probably a date like
-			   "6:07 PM May 22, 2007" */
+			 * "6:07 PM May 22, 2007" 
+			 */
 			return hour_month_day_date (date);
 		}
 	}
@@ -209,102 +206,101 @@ date_to_iso8601 (const gchar *date)
 }
 
 static void
-extract_ps (const gchar *uri,
-	    TrackerSparqlBuilder	*metadata)
+extract_ps (const gchar          *uri,
+	    TrackerSparqlBuilder *metadata)
 {
 	FILE *f;
-	gchar *filename = g_filename_from_uri (uri, NULL, NULL);
-
+	gchar *filename;
+	gchar *line;
+	gsize length;
+	gssize read_char;
+	
+	filename = g_filename_from_uri (uri, NULL, NULL);
 	f = tracker_file_open (filename, "r", TRUE);
+	g_free (filename);
 
-	if (f) {
-		gchar  *line;
-		gsize	length;
-		gssize	read_char;
+	if (!f) {
+		return;
+	}
 
-		line = NULL;
-		length = 0;
+	line = NULL;
+	length = 0;
 
-		tracker_statement_list_insert (metadata, uri, 
-		                          RDF_TYPE, 
-		                          NFO_PREFIX "PaginatedTextDocument");
-
-		while ((read_char = getline (&line, &length, f)) != -1) {
-			gboolean pageno_atend	 = FALSE;
-			gboolean header_finished = FALSE;
-
-			line[read_char - 1] = '\0';  /* overwrite '\n' char */
-
-			if (!header_finished && strncmp (line, "%%Copyright:", 12) == 0) {
-				tracker_statement_list_insert (metadata, uri,
-							  NIE_PREFIX "copyright",
-							  line + 13);
-
-			} else if (!header_finished && strncmp (line, "%%Title:", 8) == 0) {
-				tracker_statement_list_insert (metadata, uri,
-							  NIE_PREFIX "title",
-							  line + 9);
-
-			} else if (!header_finished && strncmp (line, "%%Creator:", 10) == 0) {
-
-				tracker_statement_list_insert (metadata, ":", RDF_TYPE, NCO_PREFIX "Contact");
-				tracker_statement_list_insert (metadata, ":", NCO_PREFIX "fullname", line + 11);
-				tracker_statement_list_insert (metadata, uri, NCO_PREFIX "creator", ":");
-
-			} else if (!header_finished && strncmp (line, "%%CreationDate:", 15) == 0) {
-				gchar *date;
-
-				date = date_to_iso8601 (line + 16);
-
-				if (date) {
-					tracker_statement_list_insert (metadata, uri,
-								  NIE_PREFIX "contentCreated",
-								  date);
-
-					g_free (date);
-				}
-			} else if (strncmp (line, "%%Pages:", 8) == 0) {
-				if (strcmp (line + 9, "(atend)") == 0) {
-					pageno_atend = TRUE;
-				} else {
-					tracker_statement_list_insert (metadata, uri,
-								  NFO_PREFIX "pageCount",
-								  line + 9);
-				}
-			} else if (strncmp (line, "%%EndComments", 14) == 0) {
-				header_finished = TRUE;
-
-				if (!pageno_atend) {
-					break;
-				}
+	tracker_sparql_builder_subject_iri (metadata, uri);
+	tracker_sparql_builder_predicate (metadata, "a");
+	tracker_sparql_builder_object (metadata, "nfo:PaginatedTextDocument");
+	
+	while ((read_char = getline (&line, &length, f)) != -1) {
+		gboolean pageno_atend = FALSE;
+		gboolean header_finished = FALSE;
+
+		line[read_char - 1] = '\0';  /* overwrite '\n' char */
+
+		if (!header_finished && strncmp (line, "%%Copyright:", 12) == 0) {
+			tracker_sparql_builder_predicate (metadata, "nie:copyright");
+			tracker_sparql_builder_object_unvalidated (metadata, line + 13);
+		} else if (!header_finished && strncmp (line, "%%Title:", 8) == 0) {
+			tracker_sparql_builder_predicate (metadata, "nie:title");
+			tracker_sparql_builder_object_unvalidated (metadata, line + 9);
+		} else if (!header_finished && strncmp (line, "%%Creator:", 10) == 0) {
+			tracker_sparql_builder_predicate (metadata, "nco:creator");
+			tracker_sparql_builder_object_blank_open (metadata);
+			tracker_sparql_builder_predicate (metadata, "a");
+			tracker_sparql_builder_object (metadata, "nco:Contact");
+			tracker_sparql_builder_predicate (metadata, "nco:fullname");
+			tracker_sparql_builder_object_unvalidated (metadata, line + 11);
+			tracker_sparql_builder_object_blank_close (metadata);
+		} else if (!header_finished && strncmp (line, "%%CreationDate:", 15) == 0) {
+			gchar *date;
+
+			date = date_to_iso8601 (line + 16);
+			if (date) {
+				tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+				tracker_sparql_builder_object_unvalidated (metadata, date);
+				g_free (date);
 			}
+		} else if (strncmp (line, "%%Pages:", 8) == 0) {
+			if (strcmp (line + 9, "(atend)") == 0) {
+				pageno_atend = TRUE;
+			} else {
+				gint64 page_count;
+
+				page_count = g_ascii_strtoll (line + 9, NULL, 10);
+				tracker_sparql_builder_predicate (metadata, "nfo:pageCount");
+				tracker_sparql_builder_object_int64 (metadata, page_count);
+			}
+		} else if (strncmp (line, "%%EndComments", 14) == 0) {
+			header_finished = TRUE;
 
-			g_free (line);
-			line = NULL;
-			length = 0;
+			if (!pageno_atend) {
+				break;
+			}
 		}
 
-		if (line) {
-			g_free (line);
-		}
+		g_free (line);
+		line = NULL;
+		length = 0;
+	}
 
-		tracker_file_close (f, FALSE);
+	if (line) {
+		g_free (line);
 	}
 
-	g_free (filename);
+	tracker_file_close (f, FALSE);
 }
 
 #ifdef USING_UNZIPPSFILES
+
 static void
-extract_ps_gz (const gchar *uri,
-	       TrackerSparqlBuilder   *metadata)
+extract_ps_gz (const gchar          *uri,
+	       TrackerSparqlBuilder *metadata)
 {
-	FILE	    *fz, *f;
-	GError	    *error = NULL;
-	gchar	    *gunzipped;
-	gint	     fdz;
-	gint	     fd;
-	gboolean     ptat;
+	FILE *fz, *f;
+	GError *error = NULL;
+	gchar *gunzipped;
+	gint fdz;
+	gint fd;
+	gboolean ptat;
 	const gchar *argv[4];
 	gchar *filename;
 	
@@ -390,7 +386,8 @@ extract_ps_gz (const gchar *uri,
 	g_unlink (gunzipped);
 	g_free (filename);
 }
-#endif
+
+#endif /* USING_UNZIPPSFILES */
 
 TrackerExtractData *
 tracker_get_extract_data (void)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]