[tracker/tracker-0.6] Bug NB#100123 - Image:Date is empty for some of the images



commit 9b7bea1d3c5478eb7d5d6695a80cecef85dbe351
Author: Carlos Garnacho <carlosg gnome org>
Date:   Mon Jun 22 16:23:28 2009 +0200

    Bug NB#100123 - Image:Date is empty for some of the images
    
    Reworked date parser, based on a patch by Mikael Ottela, now the parser uses
    sscanf() to parse the date part by part, and compose the ISO8601 date from it.

 src/libtracker-common/tracker-type-utils.c        |  332 +++++++++++---------
 src/tracker-extract/tracker-extract-jpeg.c        |    2 +
 tests/libtracker-common/tracker-type-utils-test.c |   92 ++++++-
 3 files changed, 269 insertions(+), 157 deletions(-)
---
diff --git a/src/libtracker-common/tracker-type-utils.c b/src/libtracker-common/tracker-type-utils.c
index b12c8b8..bb095f1 100644
--- a/src/libtracker-common/tracker-type-utils.c
+++ b/src/libtracker-common/tracker-type-utils.c
@@ -27,6 +27,8 @@
 #include <strings.h>
 #include <string.h>
 #include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
 
 #include <glib.h>
 
@@ -46,42 +48,158 @@ static const char imonths[] = {
 	'6', '7', '8', '9', '0', '1', '2'
 };
 
-static gboolean
-is_int (const gchar *str)
+static gint
+parse_month (const gchar *month)
 {
-	gint	 i, len;
+	gint i;
 
-	if (!str || str[0] == '\0') {
-		return FALSE;
+	for (i = 0; i < 12; i++) {
+		if (!strncmp (month, months[i], 3)) {
+			return i;
+		}
 	}
 
-	len = strlen (str);
+	return -1;
+}
 
-	for (i = 0; i < len; i++) {
-		if (!g_ascii_isdigit(str[i])) {
-			return FALSE;
-		}
+static gdouble
+get_remainder_multiplier (gint remainder)
+{
+	gdouble mult;
+
+	mult = (gdouble) remainder;
+
+	while (mult > 1) {
+		mult /= 10;
 	}
 
-	return TRUE ;
+	return mult;
 }
 
-static gint
-parse_month (const gchar *month)
+/* FIXME This function is crap and it doing unnecessary
+ * allocations (splits) all over the place.
+ * FIXME We still don't handle large years or year-week
+ * or year-day-formats (not that they would be common in 
+ * file formats 
+ */
+static gboolean
+tracker_simplify_8601 (const gchar *date_string,
+		       gchar       *buf)
 {
-	gint i;
+	gchar *copy, *date, *time, *zone, *sep;
+	gint year, mon, day, hour, min, sec, remainder;
+	gint len;
 
-	for (i = 0; i < 12; i++) {
-		if (!strncmp (month, months[i], 3)) {
-			return i;
+	if (!date_string) {
+		return FALSE;
+	}
+
+	date = copy = g_strdup (date_string);
+	year = mon = day = 1;
+	hour = min = sec = 0;
+	zone = NULL;
+
+	/* First try to split date and time, either by ' ' or 'T' */
+	sep = strchr (copy, 'T');
+
+	if (!sep) {
+		sep = strchr (copy, ' ');
+	}
+
+	if (sep) {
+		/* Separate date and time */
+		*sep = '\0';
+		time = sep + 1;
+	} else {
+		time = NULL;
+	}
+
+	if (time) {
+		zone = strchr (time, '+');
+
+		if (!zone) {
+			zone = strchr (time, '-');
 		}
 	}
 
-	return -1;
+	if (!zone) {
+		zone = "+00:00";
+	}
+
+	if (date) {
+		len = strlen (date);
+
+		if (len == 10 && sscanf (date, "%4d-%2d-%2d", &year, &mon, &day) == 3) {
+			/* YYYY-MM-DD */
+		} else if (len == 8 && sscanf (date, "%2d-%2d-%2d", &year, &mon, &day) == 3) {
+			/* YY-MM-DD */
+		} else if (len == 8 && sscanf (date, "%4d%2d%2d", &year, &mon, &day) == 3) {
+			/* YYYYMMDD */
+		} else if (len == 6 && sscanf (date, "%2d%2d%2d", &year, &mon, &day) == 3) {
+			/* YYMMDD */
+		} else if (len == 7 && sscanf (date, "%4d-%2d", &year, &mon) == 2) {
+			/* YYYY-MM */
+			day = 1;
+		} else if (len == 4 && sscanf (date, "%4d", &year) == 1) {
+			/* Full year */
+			mon = day = 1;
+		} else if (len == 2 && sscanf (date, "%2d", &year) == 1) {
+			/* Only the century (this is a weird one) */
+			year *= 100;
+			mon = day = 1;
+		} else {
+			g_critical ("Could not parse date in '%s'", date);
+			g_free (copy);
+			return FALSE;
+		}
+	}
+
+	if (time) {
+		len = strlen (time);
+
+		if (len >= 8 && sscanf (time, "%2d:%2d:%2d", &hour, &min, &sec) == 3) {
+			/* hh:mm:ss */
+		} else if (len >= 7 && sscanf (time, "%2d:%2d.%d", &hour, &min, &remainder) == 3) {
+			gdouble mult;
+
+			mult = get_remainder_multiplier (remainder);
+			sec = 60 * mult;
+		} else if (len == 6 && sscanf (time, "%2d%2d%2d", &hour, &min, &sec) == 3) {
+			/* hhmmss */
+		} else if (len == 5 && sscanf (time, "%2d:%2d", &hour, &min) == 2) {
+			/* hh:mm */
+			sec = 0;
+		} else if (len >= 4 && sscanf (time, "%2d.%d", &hour, &remainder) == 2) {
+			gdouble mult;
+			gint secs_in_remainder;
+
+			/* hh.r */
+			mult = get_remainder_multiplier (remainder);
+			secs_in_remainder = 60 * 60 * mult;
+			min = secs_in_remainder / 60;
+			sec = secs_in_remainder % 60;
+		} else if (len == 2 && sscanf (time, "%2d", &hour) == 1) {
+			/* hh */
+			min = sec = 0;
+		} else {
+			g_critical ("Could not parse time in '%s'", time);
+			g_free (copy);
+			return FALSE;
+		}
+	}
+
+	sprintf (buf,
+		 "%04d-%02d-%02dT%02d:%02d:%02d%s",
+		 year, mon, day,
+		 hour, min, sec,
+		 zone);
+
+	g_free (copy);
+
+	return TRUE;
 }
 
-/* Determine date format and convert to ISO 8601 format */
-/* FIXME We should handle all the fractions here (see ISO 8601), as well as YYYY:DDD etc */
+/* Determine date format and convert to simple ISO 8601 format */
 gchar *
 tracker_date_format (const gchar *date_string)
 {
@@ -94,68 +212,15 @@ tracker_date_format (const gchar *date_string)
 
 	len = strlen (date_string);
 
-	/* We cannot format a date without at least a four digit
+	/* We cannot format a date without at least a 2 digit
 	 * year.
 	 */
-	if (len < 4) {
+	if (len < 2) {
 		return NULL;
 	}
 
-	/* Check for year only dates (EG ID3 music tags might have
-	 * Audio.ReleaseDate as 4 digit year)
-	 */
-	if (len == 4) {
-		if (is_int (date_string)) {
-			buf[0] = date_string[0];
-			buf[1] = date_string[1];
-			buf[2] = date_string[2];
-			buf[3] = date_string[3];
-			buf[4] = '-';
-			buf[5] = '0';
-			buf[6] = '1';
-			buf[7] = '-';
-			buf[8] = '0';
-			buf[9] = '1';
-			buf[10] = 'T';
-			buf[11] = '0';
-			buf[12] = '0';
-			buf[13] = ':';
-			buf[14] = '0';
-			buf[15] = '0';
-			buf[16] = ':';
-			buf[17] = '0';
-			buf[18] = '0';
-			buf[19] = '\0';
-
-			return g_strdup (buf);
-		} else {
-			return NULL;
-		}
-	} else if (len == 10)  {
-		/* Check for date part only YYYY-MM-DD*/
-		buf[0] = date_string[0];
-		buf[1] = date_string[1];
-		buf[2] = date_string[2];
-		buf[3] = date_string[3];
-		buf[4] = '-';
-		buf[5] = date_string[5];
-		buf[6] = date_string[6];
-		buf[7] = '-';
-		buf[8] = date_string[8];
-		buf[9] = date_string[9];
-		buf[10] = 'T';
-		buf[11] = '0';
-		buf[12] = '0';
-		buf[13] = ':';
-		buf[14] = '0';
-		buf[15] = '0';
-		buf[16] = ':';
-		buf[17] = '0';
-		buf[18] = '0';
-		buf[19] = '\0';
-
-		return g_strdup (buf);
-	} else if (len == 14) {
+	/* First check for non-8601 formats (why do we even do this? Extractors should already)*/
+	if (len == 14) {
 		/* Check for pdf format EG 20050315113224-08'00' or
 		 * 20050216111533Z
 		 */
@@ -178,62 +243,16 @@ tracker_date_format (const gchar *date_string)
 		buf[16] = ':';
 		buf[17] = date_string[12];
 		buf[18] = date_string[13];
-		buf[19] = '\0';
-
-		return g_strdup (buf);
-	} else if (len == 15 && date_string[14] == 'Z') {
-		buf[0] = date_string[0];
-		buf[1] = date_string[1];
-		buf[2] = date_string[2];
-		buf[3] = date_string[3];
-		buf[4] = '-';
-		buf[5] = date_string[4];
-		buf[6] = date_string[5];
-		buf[7] = '-';
-		buf[8] = date_string[6];
-		buf[9] = date_string[7];
-		buf[10] = 'T';
-		buf[11] = date_string[8];
-		buf[12] = date_string[9];
-		buf[13] = ':';
-		buf[14] = date_string[10];
-		buf[15] = date_string[11];
-		buf[16] = ':';
-		buf[17] = date_string[12];
-		buf[18] = date_string[13];
-		buf[19] = 'Z';
-		buf[20] = '\0';
-
-		return g_strdup (buf);
-	} else if (len == 21 && (date_string[14] == '-' || date_string[14] == '+' )) {
-		buf[0] = date_string[0];
-		buf[1] = date_string[1];
-		buf[2] = date_string[2];
-		buf[3] = date_string[3];
-		buf[4] = '-';
-		buf[5] = date_string[4];
-		buf[6] = date_string[5];
-		buf[7] = '-';
-		buf[8] = date_string[6];
-		buf[9] = date_string[7];
-		buf[10] = 'T';
-		buf[11] = date_string[8];
-		buf[12] = date_string[9];
-		buf[13] = ':';
-		buf[14] = date_string[10];
-		buf[15] = date_string[11];
-		buf[16] = ':';
-		buf[17] = date_string[12];
-		buf[18] = date_string[13];
-		buf[19] = date_string[14];
-		buf[20] = date_string[15];
-		buf[21] = date_string[16];
-		buf[22] =  ':';
-		buf[23] = date_string[18];
-		buf[24] = date_string[19];
+		buf[19] = '+';
+		buf[20] = '0';
+		buf[21] = '0';
+		buf[22] = ':';
+		buf[23] = '0';
+		buf[24] = '0';
 		buf[25] = '\0';
 
 		return g_strdup (buf);
+
 	} else if ((len == 24) && (date_string[3] == ' ')) {
 		/* Check for msoffice date format "Mon Feb  9 10:10:00 2004" */
 		gint  num_month;
@@ -276,9 +295,16 @@ tracker_date_format (const gchar *date_string)
 		buf[16] = ':';
 		buf[17] = date_string[17];
 		buf[18] = date_string[18];
-		buf[19] = '\0';
+		buf[19] = '+';
+		buf[20] = '0';
+		buf[21] = '0';
+		buf[22] = ':';
+		buf[23] = '0';
+		buf[24] = '0';
+		buf[25] = '\0';
 
 		return g_strdup (buf);
+
 	} else if ((len == 19) && (date_string[4] == ':') && (date_string[7] == ':')) {
 		/* Check for Exif date format "2005:04:29 14:56:54" */
 		buf[0] = date_string[0];
@@ -300,40 +326,48 @@ tracker_date_format (const gchar *date_string)
 		buf[16] = ':';
 		buf[17] = date_string[17];
 		buf[18] = date_string[18];
-		buf[19] = '\0';
+		buf[19] = '+';
+		buf[20] = '0';
+		buf[21] = '0';
+		buf[22] = ':';
+		buf[23] = '0';
+		buf[24] = '0';
+		buf[25] = '\0';
 
 		return g_strdup (buf);
-	} else if ((len == 28) && (date_string[4] == '-') && (date_string[10] == 'T') 
-		   && (date_string[19] == '.') ) {
-		/* The fraction of seconds ISO 8601 "YYYY-MM-DDThh:mm:ss.ff+zz:zz" */
+
+	} else if (len == 15 && date_string[14] == 'Z') {		
 		buf[0] = date_string[0];
 		buf[1] = date_string[1];
 		buf[2] = date_string[2];
 		buf[3] = date_string[3];
 		buf[4] = '-';
-		buf[5] = date_string[5];
-		buf[6] = date_string[6];
+		buf[5] = date_string[4];
+		buf[6] = date_string[5];
 		buf[7] = '-';
-		buf[8] = date_string[8];
-		buf[9] = date_string[9];
+		buf[8] = date_string[6];
+		buf[9] = date_string[7];
 		buf[10] = 'T';
-		buf[11] = date_string[11];
-		buf[12] = date_string[12];
+		buf[11] = date_string[8];
+		buf[12] = date_string[9];
 		buf[13] = ':';
-		buf[14] = date_string[14];
-		buf[15] = date_string[15];
+		buf[14] = date_string[10];
+		buf[15] = date_string[11];
 		buf[16] = ':';
-		buf[17] = date_string[17];
-		buf[18] = date_string[18];
-		buf[19] = date_string[22];
-		buf[20] = date_string[23];
-		buf[21] = date_string[24];
+		buf[17] = date_string[12];
+		buf[18] = date_string[13];
+		buf[19] = '+';
+		buf[20] = '0';
+		buf[21] = '0';
 		buf[22] = ':';
-		buf[23] = date_string[26];
-		buf[24] = date_string[27];
+		buf[23] = '0';
+		buf[24] = '0';
 		buf[25] = '\0';
 
 		return g_strdup (buf);
+
+	} else if (tracker_simplify_8601 (date_string, buf)) {
+		return g_strdup (buf);
 	}
 
 	return g_strdup (date_string);
diff --git a/src/tracker-extract/tracker-extract-jpeg.c b/src/tracker-extract/tracker-extract-jpeg.c
index 76ab808..ba88064 100644
--- a/src/tracker-extract/tracker-extract-jpeg.c
+++ b/src/tracker-extract/tracker-extract-jpeg.c
@@ -474,6 +474,8 @@ fail:
 				     tracker_escape_metadata (date));
 		g_free (date);
 	}
+
+	g_debug ("Date: %s", (char *)g_hash_table_lookup (metadata, "Image:Date"));
 }
 
 TrackerExtractData *
diff --git a/tests/libtracker-common/tracker-type-utils-test.c b/tests/libtracker-common/tracker-type-utils-test.c
index 24a20ae..55d6f17 100644
--- a/tests/libtracker-common/tracker-type-utils-test.c
+++ b/tests/libtracker-common/tracker-type-utils-test.c
@@ -36,31 +36,107 @@ test_date_format (void)
 	result = tracker_date_format ("");
 	g_assert (result == NULL);
 
-	/* Fails
 	result = tracker_date_format ("1978"); //Audio.ReleaseDate
-	g_assert (tracker_test_helpers_cmpstr_equal (result, "1978-01-01T00:00:00"));
-	*/
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1978-01-01T00:00:00+00:00"));
 
 	result = tracker_date_format ("2008-06-14");
-	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00"));
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00+00:00"));
 	g_free (result);
 
 	result = tracker_date_format ("20080614000000");
-	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00"));
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00+00:00"));
 	g_free (result);
 
 	result = tracker_date_format ("20080614000000Z");
-	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00Z"));
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T00:00:00+00:00"));
 	g_free (result);
 
 	result = tracker_date_format ("Mon Jun 14 04:20:20 2008"); /* MS Office */
-	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T04:20:20"));
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T04:20:20+00:00"));
 	g_free (result);
 
 	result = tracker_date_format ("2008:06:14 04:20:20"); /* Exif style */
-	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T04:20:20"));
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-06-14T04:20:20+00:00"));
+	g_free (result);
+
+	/* Do everything allowed in ISO 8601 here */
+
+	/* Dates */
+	/* YYYY or YYYY-MM or YYYY-MM-DD or YYYYMMDD */
+	result = tracker_date_format ("2008");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "2008-01-01T00:00:00+00:00"));
+	g_free (result);
+
+	result = tracker_date_format ("1978-06");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1978-06-01T00:00:00+00:00"));
+	g_free (result);
+
+	result = tracker_date_format ("1999-02-23");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1999-02-23T00:00:00+00:00"));
+	g_free (result);
+
+	result = tracker_date_format ("19931223");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1993-12-23T00:00:00+00:00"));
+	g_free (result);
+
+	/* Times */
+	/* hh:mm:ss or hhmmss, hh:mm or hhmm or hh , with T and without*/
+	
+	result = tracker_date_format ("1999-02-23T23:12:01");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1999-02-23T23:12:01+00:00"));
+	g_free (result);	
+
+	result = tracker_date_format ("19990223T231201");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1999-02-23T23:12:01+00:00"));
+	g_free (result);
+
+	result = tracker_date_format ("1979-03-04 16:03:03");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:03+00:00"));
+	g_free (result);
+
+	result = tracker_date_format ("1979-03-04T16:03");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:00+00:00"));
 	g_free (result);
 
+	result = tracker_date_format ("1979-03-04T16");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:00:00+00:00"));
+	g_free (result);	
+
+	/* Decimals */
+	/* We are only interested in accuracy of a second */
+
+	result = tracker_date_format ("1979-03-04 16:03:03.5");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:03+00:00"));
+	g_free (result);
+
+	result = tracker_date_format ("1979-03-04T16:03.2");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:12+00:00"));
+	g_free (result);
+
+	result = tracker_date_format ("1979-03-04T16.25");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:15:00+00:00"));
+	g_free (result);	
+
+	result = tracker_date_format ("1979-03-04 16.125");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:07:30+00:00"));
+	g_free (result);
+
+	result = tracker_date_format ("2002-07-20T11:24:35.1Z");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "2002-07-20T11:24:35+00:00"));
+	g_free (result);
+
+	/* Timezone and UTC */
+
+	result = tracker_date_format ("1979-03-04 16:03:03.5Z");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:03+00:00"));
+	g_free (result);
+
+	result = tracker_date_format ("1979-03-04T16:03:03.50+02:00");
+	g_assert (tracker_test_helpers_cmpstr_equal (result, "1979-03-04T16:03:03+02:00"));
+	g_free (result);
+
+	/* FIXME Missing week and day notations, truncated notation, extended years */
+
 	if (g_test_trap_fork (0, G_TEST_TRAP_SILENCE_STDERR)) {
 		result = tracker_date_format (NULL);
 	}



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]