[glib/wip/rancell/iso8601-2] GDateTime: Support parsing ISO 8601 strings.



commit aabbb3ca0482495db6efdaf702d39be60ac43564
Author: Robert Ancell <robert ancell canonical com>
Date:   Thu Aug 25 11:53:54 2016 +1200

    GDateTime: Support parsing ISO 8601 strings.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=753459

 glib/gdatetime.c       |  298 ++++++++++++++++++++++++++++++++++++++++++++++++
 glib/gdatetime.h       |    3 +
 glib/tests/gdatetime.c |  207 +++++++++++++++++++++++++++++++++
 3 files changed, 508 insertions(+), 0 deletions(-)
---
diff --git a/glib/gdatetime.c b/glib/gdatetime.c
index 8ff0223..9c68c47 100644
--- a/glib/gdatetime.c
+++ b/glib/gdatetime.c
@@ -24,6 +24,7 @@
  *          Thiago Santos <thiago sousa santos collabora co uk>
  *          Emmanuele Bassi <ebassi linux intel com>
  *          Ryan Lortie <desrt desrt ca>
+ *          Robert Ancell <robert ancell canonical com>
  */
 
 /* Algorithms within this file are based on the Calendar FAQ by
@@ -887,6 +888,303 @@ g_date_time_new_from_timeval_utc (const GTimeVal *tv)
   return datetime;
 }
 
+static gboolean
+get_iso8601_int (const gchar *text, gint length, gint *value)
+{
+  gint i, v = 0;
+
+  for (i = 0; i < length; i++)
+    {
+      gchar c = text[i];
+      if (c < '0' || c > '9')
+        return FALSE;
+      v = v * 10 + (c - '0');
+    }
+
+  *value = v;
+  return TRUE;
+}
+
+static gboolean
+get_iso8601_seconds (const gchar *text, gint length, gdouble *value)
+{
+  gint i;
+  gdouble multiplier = 0.1, v = 0;
+
+  for (i = 0; i < length; i++)
+    {
+      gchar c = text[i];
+      if (c == '.' || c == ',')
+        {
+          i++;
+          break;
+        }
+      if (c < '0' || c > '9')
+        return FALSE;
+      v = v * 10 + (c - '0');
+    }
+
+  for (; i < length; i++)
+    {
+      gchar c = text[i];
+      if (c < '0' || c > '9')
+        return FALSE;
+      v += (c - '0') * multiplier;
+      multiplier *= 0.1;
+    }
+
+  *value = v;
+  return TRUE;
+}
+
+static gboolean
+convert_from_iso8601_ordinal (gint year, gint ordinal_day, gint *month, gint *day)
+{
+  gint m;
+
+  if (ordinal_day < 1)
+    return FALSE;
+
+  for (m = 1; m <= 12; m++)
+    {
+      if (ordinal_day <= days_in_year[GREGORIAN_LEAP (year)][m])
+        {
+          *month = m;
+          *day = ordinal_day - days_in_year[GREGORIAN_LEAP (year)][m - 1];
+          return TRUE;
+        }
+    }
+
+  return FALSE;
+}
+
+static gboolean
+convert_from_iso8601_week (gint year, gint week, gint week_day, gint *offset)
+{
+  gint days, week_offset;
+
+  if (week < 1 || week > 52 || week_day < 1 || week_day > 7)
+    return FALSE;
+
+  /* Work out the day week one starts on */
+  days = ymd_to_days (year, 1, 1);
+  week_offset = -(days % 7);
+  if (week_offset < -3)
+    week_offset += 7;
+
+  *offset = week_offset + ((week - 1) * 7) + week_day;
+  return TRUE;
+}
+
+static gboolean
+parse_iso8601_date (const gchar *text, gint length,
+                    gint *year, gint *month, gint *day, gint *offset)
+{
+  /* YYYY-MM-DD */
+  if (length == 10 && text[4] == '-' && text[7] == '-')
+    {
+      return get_iso8601_int (text, 4, year) &&
+             get_iso8601_int (text + 5, 2, month) &&
+             get_iso8601_int (text + 8, 2, day);
+    }
+  /* YYYY-DDD */
+  else if (length == 8 && text[4] == '-')
+    {
+      gint ordinal_day;
+      return get_iso8601_int (text, 4, year) &&
+             get_iso8601_int (text + 5, 3, &ordinal_day) &&
+             convert_from_iso8601_ordinal (*year, ordinal_day, month, day);
+    }
+  /* YYYY-Www-D */
+  else if (length == 10 && text[4] == '-' && text[5] == 'W' && text[8] == '-')
+    {
+      gint week, week_day;
+      *month = 1;
+      *day = 1;
+      return get_iso8601_int (text, 4, year) &&
+             get_iso8601_int (text + 6, 2, &week) &&
+             get_iso8601_int (text + 9, 1, &week_day) &&
+             convert_from_iso8601_week (*year, week, week_day, offset);
+    }
+  /* YYYYWwwD */
+  else if (length == 8 && text[4] == 'W')
+    {
+      gint week, week_day;
+      *month = 1;
+      *day = 1;
+      return get_iso8601_int (text, 4, year) &&
+             get_iso8601_int (text + 5, 2, &week) &&
+             get_iso8601_int (text + 7, 1, &week_day) &&
+             convert_from_iso8601_week (*year, week, week_day, offset);
+    }
+  /* YYYYMMDD */
+  else if (length == 8)
+    {
+      return get_iso8601_int (text, 4, year) &&
+             get_iso8601_int (text + 4, 2, month) &&
+             get_iso8601_int (text + 6, 2, day);
+    }
+  /* YYYYDDD */
+  else if (length == 7)
+    {
+      gint ordinal_day;
+      return get_iso8601_int (text, 4, year) &&
+             get_iso8601_int (text + 4, 3, &ordinal_day) &&
+             convert_from_iso8601_ordinal (*year, ordinal_day, month, day);
+    }
+  else
+    return FALSE;
+}
+
+static gboolean
+parse_iso8601_timezone (const gchar *text, gint length, GTimeZone **tz)
+{
+  gint offset_hours, offset_minutes;
+
+  /* Z */
+  if (length == 1 && text[0] == 'Z')
+    {
+      offset_hours = 0;
+      offset_minutes = 0;
+    }
+  /* +hh:mm or -hh:mm */
+  else if (length == 6 && (text[0] == '+' || text[0] == '-') && text[3] == ':')
+    {
+      if (!get_iso8601_int (text + 1, 2, &offset_hours) ||
+          !get_iso8601_int (text + 4, 2, &offset_minutes))
+          return FALSE;
+    }
+  /* +hhmm or -hhmm */
+  else if (length == 5 && (text[0] == '+' || text[0] == '-'))
+    {
+      if (!get_iso8601_int (text + 1, 2, &offset_hours) ||
+          !get_iso8601_int (text + 3, 2, &offset_minutes))
+          return FALSE;
+    }
+  /* +hh or -hh */
+  else if (length == 3 && (text[0] == '+' || text[0] == '-'))
+    {
+      if (!get_iso8601_int (text + 1, 2, &offset_hours))
+          return FALSE;
+      offset_minutes = 0;
+    }
+  else
+    return FALSE;
+
+  *tz = g_time_zone_new (text);
+
+  return TRUE;
+}
+
+static gboolean
+parse_iso8601_time (const gchar *text, gint length,
+                    gint *hour, gint *minute, gdouble *seconds, GTimeZone **tz)
+{
+  gint i;
+
+  /* Check for timezone suffix */
+  for (i = 0; i < length; i++)
+    {
+      if (parse_iso8601_timezone (text + i, length - i, tz))
+        {
+          length = i;
+          break;
+        }
+    }
+
+  /* hh:mm:ss(.sss) */
+  if (length >= 8 && text[2] == ':' && text[5] == ':')
+    {
+      return get_iso8601_int (text, 2, hour) &&
+             get_iso8601_int (text + 3, 2, minute) &&
+             get_iso8601_seconds (text + 6, length - 6, seconds);
+    }
+  /* hhmmss(.sss) */
+  else if (length >= 6)
+    {
+      return get_iso8601_int (text, 2, hour) &&
+             get_iso8601_int (text + 2, 2, minute) &&
+             get_iso8601_seconds (text + 4, length - 4, seconds);
+    }
+  else
+    return FALSE;
+}
+
+/**
+ * g_date_time_new_from_iso8601:
+ * @text: an ISO 8601 formatted time string.
+ *
+ * Creates a #GDateTime corresponding to the given ISO 8601 formatted string
+ * @text. Only the following subset of ISO8601 is supported:
+ *
+ * <date>T<time> or <date>t<time> or <date> <time>
+ *
+ * <date> is in the form:
+ * YYYY-MM-DD - Year/month/day, e.g. 2016-08-24.
+ * YYYYMMDD   - Same as above without dividers.
+ * YYYY-DDD   - Ordinal day where DDD is from 001 to 366, e.g. 2016-237.
+ * YYYYDDD    - Same as above without dividers.
+ * YYYY-Www-D - Week day where ww is from 01 to 52 and D from 1-7, e.g.
+                2016-W34-3.
+ * YYYYWwwD   - Same as above without dividers.
+ *
+ * <time> is in the form:
+ * hh:mm:ss(.sss) - Hours, minutes, seconds (subseconds), e.g. 22:10:42.123.
+ * hhmmss(.sss)   - Same as above without dividers.
+ *
+ * Time can a timezone suffix in the form:
+ * Z                - UTC.
+ * +hh:mm or -hh:mm - Offset from UTC in hours and minutes, e.g. +12:00.
+ * +hh or -hh       - Offset from UTC in hours, e.g. +12.
+ *
+ * This call can fail (returning %NULL) if @text is not a valid ISO 8601
+ * formatted string.
+ *
+ * You should release the return value by calling g_date_time_unref()
+ * when you are done with it.
+ *
+ * Returns: a new #GDateTime, or %NULL
+ *
+ * Since: 2.50
+ **/
+GDateTime *
+g_date_time_new_from_iso8601 (const gchar *text)
+{
+  gint length, date_length = -1;
+  gint year = 0, month = 0, day = 0, offset = 0, hour = 0, minute = 0;
+  gdouble seconds = 0.0;
+  GTimeZone *tz = NULL;
+  GDateTime *datetime = NULL;
+
+  g_return_val_if_fail (text != NULL, NULL);
+
+  /* Date and time is separated by 'T', 't', or ' '*/
+  for (length = 0; text[length] != '\0'; length++)
+    {
+      if (date_length < 0 && (text[length] == 'T' || text[length] == 't' || text[length] == ' '))
+        date_length = length;
+    }
+
+  if (date_length < 0)
+    return NULL;
+
+  if (!parse_iso8601_date (text, date_length, &year, &month, &day, &offset) ||
+      !parse_iso8601_time (text + date_length + 1, length - (date_length + 1),
+                           &hour, &minute, &seconds, &tz))
+    goto out;
+
+  if (tz == NULL)
+    tz = g_time_zone_new_local ();
+  datetime = g_date_time_new (tz, year, month, day, hour, minute, seconds);
+  if (datetime != NULL && offset != 0)
+    datetime->days += offset;
+
+out:
+    if (tz != NULL)
+      g_time_zone_unref (tz);
+    return datetime;
+}
+
 /* full new functions {{{1 */
 
 /**
diff --git a/glib/gdatetime.h b/glib/gdatetime.h
index 63942c8..ce2a2c7 100644
--- a/glib/gdatetime.h
+++ b/glib/gdatetime.h
@@ -120,6 +120,9 @@ GDateTime *             g_date_time_new_from_timeval_local              (const G
 GLIB_AVAILABLE_IN_ALL
 GDateTime *             g_date_time_new_from_timeval_utc                (const GTimeVal *tv);
 
+GLIB_AVAILABLE_IN_2_50
+GDateTime *             g_date_time_new_from_iso8601                    (const gchar         *text);
+
 GLIB_AVAILABLE_IN_ALL
 GDateTime *             g_date_time_new                                 (GTimeZone      *tz,
                                                                          gint            year,
diff --git a/glib/tests/gdatetime.c b/glib/tests/gdatetime.c
index f6c3cf0..a346e56 100644
--- a/glib/tests/gdatetime.c
+++ b/glib/tests/gdatetime.c
@@ -380,6 +380,212 @@ test_GDateTime_new_from_timeval_utc (void)
 }
 
 static void
+test_GDateTime_new_from_iso8601 (void)
+{
+  GDateTime *dt;
+
+  /* Need non-empty string */
+  dt = g_date_time_new_from_iso8601 ("");
+  g_assert (dt == NULL);
+
+  /* Needs to be correctly formatted */
+  dt = g_date_time_new_from_iso8601 ("not a date");
+  g_assert (dt == NULL);
+
+  /* Check common case */
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_date_time_unref (dt);
+
+  /* Can't have whitespace */
+  dt = g_date_time_new_from_iso8601 ("2016 08 24T22:10:42");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42 ");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 (" 2016-08-24T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Check lowercase time separator or space allowed */
+  dt = g_date_time_new_from_iso8601 ("2016-08-24t22:10:42");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_date_time_unref (dt);
+  dt = g_date_time_new_from_iso8601 ("2016-08-24 22:10:42");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_date_time_unref (dt);
+
+  /* Check dates without separators allowed */
+  dt = g_date_time_new_from_iso8601 ("20160824T22:10:42");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_date_time_unref (dt);
+
+  /* Months are two digits */
+  dt = g_date_time_new_from_iso8601 ("2016-1-01T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Days are two digits */
+  dt = g_date_time_new_from_iso8601 ("2016-01-1T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Need consistent usage of separators */
+  dt = g_date_time_new_from_iso8601 ("2016-0824T22:10:42");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("201608-24T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Check month within valid range */
+  dt = g_date_time_new_from_iso8601 ("2016-00-13T22:10:42");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("2016-13-13T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Check day within valid range */
+  dt = g_date_time_new_from_iso8601 ("2016-01-00T22:10:42");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("2016-01-32T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Check ordinal days work */
+  dt = g_date_time_new_from_iso8601 ("2016-237T22:10:42");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_date_time_unref (dt);
+  dt = g_date_time_new_from_iso8601 ("2016237T22:10:42");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_date_time_unref (dt);
+
+  /* Days start at 1 */
+  dt = g_date_time_new_from_iso8601 ("2016-000T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Limited to number of days in the year (2016 is a leap year) */
+  dt = g_date_time_new_from_iso8601 ("2016-367T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Days are two digits */
+  dt = g_date_time_new_from_iso8601 ("2016-1T22:10:42");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("2016-12T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Check week days work */
+  dt = g_date_time_new_from_iso8601 ("2016-W34-3T22:10:42");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_date_time_unref (dt);
+  dt = g_date_time_new_from_iso8601 ("2016W343T22:10:42");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_date_time_unref (dt);
+
+  /* We don't support weeks without weekdays (valid ISO 8601) */
+  dt = g_date_time_new_from_iso8601 ("2016-W34T22:10:42");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("2016W34T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Weeks are two digits */
+  dt = g_date_time_new_from_iso8601 ("2016-W3-1T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Weeks start at 1 */
+  dt = g_date_time_new_from_iso8601 ("2016-W00-1T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Limited to number of weeks in the year */
+  dt = g_date_time_new_from_iso8601 ("2016-W53-1T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Limited to number of days in the week */
+  dt = g_date_time_new_from_iso8601 ("2016-W34-0T22:10:42");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("2016-W34-8T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Days are one digit */
+  dt = g_date_time_new_from_iso8601 ("2016-W34-99T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Check week day changes depending on year */
+  dt = g_date_time_new_from_iso8601 ("2017-W34-1T22:10:42");
+  ASSERT_DATE (dt, 2017, 8, 21);
+  g_date_time_unref (dt);
+
+  /* Check week day changes depending on leap years */
+  dt = g_date_time_new_from_iso8601 ("1900-W01-1T22:10:42");
+  ASSERT_DATE (dt, 1900, 1, 1);
+  g_date_time_unref (dt);
+
+  /* YYYY-MM not allowed (NOT valid ISO 8601) */
+  dt = g_date_time_new_from_iso8601 ("2016-08T22:10:42");
+  g_assert (dt == NULL);
+
+  /* We don't support omitted year (valid ISO 8601) */
+  dt = g_date_time_new_from_iso8601 ("--08-24T22:10:42");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("--0824T22:10:42");
+  g_assert (dt == NULL);
+
+  /* Check subseconds work */
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42.123456");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42.123456);
+  g_date_time_unref (dt);
+
+  /* Check time separators optional */
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T221042.123456");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42.123456);
+  g_date_time_unref (dt);
+
+  /* We don't support times without minutes / seconds (valid ISO 8601) */
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T2210");
+  g_assert (dt == NULL);
+
+  /* UTC time uses 'Z' */
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42Z");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_assert_cmpint (g_date_time_get_utc_offset (dt), ==, 0);
+  g_date_time_unref (dt);
+
+  /* Check timezone works */
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42+12:00");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_assert_cmpint (g_date_time_get_utc_offset (dt), ==, 12 * G_TIME_SPAN_HOUR);
+  g_date_time_unref (dt);
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42+12");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_assert_cmpint (g_date_time_get_utc_offset (dt), ==, 12 * G_TIME_SPAN_HOUR);
+  g_date_time_unref (dt);
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42-02");
+  ASSERT_DATE (dt, 2016, 8, 24);
+  ASSERT_TIME (dt, 22, 10, 42);
+  g_assert_cmpint (g_date_time_get_utc_offset (dt), ==, -2 * G_TIME_SPAN_HOUR);
+  g_date_time_unref (dt);
+
+  /* Timezone seconds not allowed */
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22-12:00:00");
+  g_assert (dt == NULL);
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22-12:00:00.000");
+  g_assert (dt == NULL);
+
+  /* Timezone hours two digits */
+  dt = g_date_time_new_from_iso8601 ("2016-08-24T22-2");
+  g_assert (dt == NULL);
+}
+
+static void
 test_GDateTime_to_unix (void)
 {
   GDateTime *dt;
@@ -1655,6 +1861,7 @@ main (gint   argc,
   g_test_add_func ("/GDateTime/new_from_unix_utc", test_GDateTime_new_from_unix_utc);
   g_test_add_func ("/GDateTime/new_from_timeval", test_GDateTime_new_from_timeval);
   g_test_add_func ("/GDateTime/new_from_timeval_utc", test_GDateTime_new_from_timeval_utc);
+  g_test_add_func ("/GDateTime/new_from_iso8601", test_GDateTime_new_from_iso8601);
   g_test_add_func ("/GDateTime/new_full", test_GDateTime_new_full);
   g_test_add_func ("/GDateTime/now", test_GDateTime_now);
   g_test_add_func ("/GDateTime/printf", test_GDateTime_printf);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]