[glib/wip/rancell/iso8601-2] GDateTime: Support parsing ISO 8601 strings.
- From: Robert Ancell <rancell src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/wip/rancell/iso8601-2] GDateTime: Support parsing ISO 8601 strings.
- Date: Mon, 29 Aug 2016 03:36:43 +0000 (UTC)
commit fcef868d676ce4697f10f46238bd9defd37b7606
Author: Robert Ancell <robert ancell canonical com>
Date: Thu Aug 25 11:53:54 2016 +1200
GDateTime: Support parsing ISO 8601 strings.
https://bugzilla.gnome.org/show_bug.cgi?id=753459
glib/gdatetime.c | 298 ++++++++++++++++++++++++++++++++++++++++++++++++
glib/gdatetime.h | 3 +
glib/tests/gdatetime.c | 219 +++++++++++++++++++++++++++++++++++
3 files changed, 520 insertions(+), 0 deletions(-)
---
diff --git a/glib/gdatetime.c b/glib/gdatetime.c
index 8ff0223..9c68c47 100644
--- a/glib/gdatetime.c
+++ b/glib/gdatetime.c
@@ -24,6 +24,7 @@
* Thiago Santos <thiago sousa santos collabora co uk>
* Emmanuele Bassi <ebassi linux intel com>
* Ryan Lortie <desrt desrt ca>
+ * Robert Ancell <robert ancell canonical com>
*/
/* Algorithms within this file are based on the Calendar FAQ by
@@ -887,6 +888,303 @@ g_date_time_new_from_timeval_utc (const GTimeVal *tv)
return datetime;
}
+static gboolean
+get_iso8601_int (const gchar *text, gint length, gint *value)
+{
+ gint i, v = 0;
+
+ for (i = 0; i < length; i++)
+ {
+ gchar c = text[i];
+ if (c < '0' || c > '9')
+ return FALSE;
+ v = v * 10 + (c - '0');
+ }
+
+ *value = v;
+ return TRUE;
+}
+
+static gboolean
+get_iso8601_seconds (const gchar *text, gint length, gdouble *value)
+{
+ gint i;
+ gdouble multiplier = 0.1, v = 0;
+
+ for (i = 0; i < length; i++)
+ {
+ gchar c = text[i];
+ if (c == '.' || c == ',')
+ {
+ i++;
+ break;
+ }
+ if (c < '0' || c > '9')
+ return FALSE;
+ v = v * 10 + (c - '0');
+ }
+
+ for (; i < length; i++)
+ {
+ gchar c = text[i];
+ if (c < '0' || c > '9')
+ return FALSE;
+ v += (c - '0') * multiplier;
+ multiplier *= 0.1;
+ }
+
+ *value = v;
+ return TRUE;
+}
+
+static gboolean
+convert_from_iso8601_ordinal (gint year, gint ordinal_day, gint *month, gint *day)
+{
+ gint m;
+
+ if (ordinal_day < 1)
+ return FALSE;
+
+ for (m = 1; m <= 12; m++)
+ {
+ if (ordinal_day <= days_in_year[GREGORIAN_LEAP (year)][m])
+ {
+ *month = m;
+ *day = ordinal_day - days_in_year[GREGORIAN_LEAP (year)][m - 1];
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+static gboolean
+convert_from_iso8601_week (gint year, gint week, gint week_day, gint *offset)
+{
+ gint days, week_offset;
+
+ if (week < 1 || week > 52 || week_day < 1 || week_day > 7)
+ return FALSE;
+
+ /* Work out the day week one starts on */
+ days = ymd_to_days (year, 1, 1);
+ week_offset = -(days % 7);
+ if (week_offset < -3)
+ week_offset += 7;
+
+ *offset = week_offset + ((week - 1) * 7) + week_day;
+ return TRUE;
+}
+
+static gboolean
+parse_iso8601_date (const gchar *text, gint length,
+ gint *year, gint *month, gint *day, gint *offset)
+{
+ /* YYYY-MM-DD */
+ if (length == 10 && text[4] == '-' && text[7] == '-')
+ {
+ return get_iso8601_int (text, 4, year) &&
+ get_iso8601_int (text + 5, 2, month) &&
+ get_iso8601_int (text + 8, 2, day);
+ }
+ /* YYYY-DDD */
+ else if (length == 8 && text[4] == '-')
+ {
+ gint ordinal_day;
+ return get_iso8601_int (text, 4, year) &&
+ get_iso8601_int (text + 5, 3, &ordinal_day) &&
+ convert_from_iso8601_ordinal (*year, ordinal_day, month, day);
+ }
+ /* YYYY-Www-D */
+ else if (length == 10 && text[4] == '-' && text[5] == 'W' && text[8] == '-')
+ {
+ gint week, week_day;
+ *month = 1;
+ *day = 1;
+ return get_iso8601_int (text, 4, year) &&
+ get_iso8601_int (text + 6, 2, &week) &&
+ get_iso8601_int (text + 9, 1, &week_day) &&
+ convert_from_iso8601_week (*year, week, week_day, offset);
+ }
+ /* YYYYWwwD */
+ else if (length == 8 && text[4] == 'W')
+ {
+ gint week, week_day;
+ *month = 1;
+ *day = 1;
+ return get_iso8601_int (text, 4, year) &&
+ get_iso8601_int (text + 5, 2, &week) &&
+ get_iso8601_int (text + 7, 1, &week_day) &&
+ convert_from_iso8601_week (*year, week, week_day, offset);
+ }
+ /* YYYYMMDD */
+ else if (length == 8)
+ {
+ return get_iso8601_int (text, 4, year) &&
+ get_iso8601_int (text + 4, 2, month) &&
+ get_iso8601_int (text + 6, 2, day);
+ }
+ /* YYYYDDD */
+ else if (length == 7)
+ {
+ gint ordinal_day;
+ return get_iso8601_int (text, 4, year) &&
+ get_iso8601_int (text + 4, 3, &ordinal_day) &&
+ convert_from_iso8601_ordinal (*year, ordinal_day, month, day);
+ }
+ else
+ return FALSE;
+}
+
+static gboolean
+parse_iso8601_timezone (const gchar *text, gint length, GTimeZone **tz)
+{
+ gint offset_hours, offset_minutes;
+
+ /* Z */
+ if (length == 1 && text[0] == 'Z')
+ {
+ offset_hours = 0;
+ offset_minutes = 0;
+ }
+ /* +hh:mm or -hh:mm */
+ else if (length == 6 && (text[0] == '+' || text[0] == '-') && text[3] == ':')
+ {
+ if (!get_iso8601_int (text + 1, 2, &offset_hours) ||
+ !get_iso8601_int (text + 4, 2, &offset_minutes))
+ return FALSE;
+ }
+ /* +hhmm or -hhmm */
+ else if (length == 5 && (text[0] == '+' || text[0] == '-'))
+ {
+ if (!get_iso8601_int (text + 1, 2, &offset_hours) ||
+ !get_iso8601_int (text + 3, 2, &offset_minutes))
+ return FALSE;
+ }
+ /* +hh or -hh */
+ else if (length == 3 && (text[0] == '+' || text[0] == '-'))
+ {
+ if (!get_iso8601_int (text + 1, 2, &offset_hours))
+ return FALSE;
+ offset_minutes = 0;
+ }
+ else
+ return FALSE;
+
+ *tz = g_time_zone_new (text);
+
+ return TRUE;
+}
+
+static gboolean
+parse_iso8601_time (const gchar *text, gint length,
+ gint *hour, gint *minute, gdouble *seconds, GTimeZone **tz)
+{
+ gint i;
+
+ /* Check for timezone suffix */
+ for (i = 0; i < length; i++)
+ {
+ if (parse_iso8601_timezone (text + i, length - i, tz))
+ {
+ length = i;
+ break;
+ }
+ }
+
+ /* hh:mm:ss(.sss) */
+ if (length >= 8 && text[2] == ':' && text[5] == ':')
+ {
+ return get_iso8601_int (text, 2, hour) &&
+ get_iso8601_int (text + 3, 2, minute) &&
+ get_iso8601_seconds (text + 6, length - 6, seconds);
+ }
+ /* hhmmss(.sss) */
+ else if (length >= 6)
+ {
+ return get_iso8601_int (text, 2, hour) &&
+ get_iso8601_int (text + 2, 2, minute) &&
+ get_iso8601_seconds (text + 4, length - 4, seconds);
+ }
+ else
+ return FALSE;
+}
+
+/**
+ * g_date_time_new_from_iso8601:
+ * @text: an ISO 8601 formatted time string.
+ *
+ * Creates a #GDateTime corresponding to the given ISO 8601 formatted string
+ * @text. Only the following subset of ISO8601 is supported:
+ *
+ * <date>T<time> or <date>t<time> or <date> <time>
+ *
+ * <date> is in the form:
+ * YYYY-MM-DD - Year/month/day, e.g. 2016-08-24.
+ * YYYYMMDD - Same as above without dividers.
+ * YYYY-DDD - Ordinal day where DDD is from 001 to 366, e.g. 2016-237.
+ * YYYYDDD - Same as above without dividers.
+ * YYYY-Www-D - Week day where ww is from 01 to 52 and D from 1-7, e.g.
+ 2016-W34-3.
+ * YYYYWwwD - Same as above without dividers.
+ *
+ * <time> is in the form:
+ * hh:mm:ss(.sss) - Hours, minutes, seconds (subseconds), e.g. 22:10:42.123.
+ * hhmmss(.sss) - Same as above without dividers.
+ *
+ * Time can a timezone suffix in the form:
+ * Z - UTC.
+ * +hh:mm or -hh:mm - Offset from UTC in hours and minutes, e.g. +12:00.
+ * +hh or -hh - Offset from UTC in hours, e.g. +12.
+ *
+ * This call can fail (returning %NULL) if @text is not a valid ISO 8601
+ * formatted string.
+ *
+ * You should release the return value by calling g_date_time_unref()
+ * when you are done with it.
+ *
+ * Returns: a new #GDateTime, or %NULL
+ *
+ * Since: 2.50
+ **/
+GDateTime *
+g_date_time_new_from_iso8601 (const gchar *text)
+{
+ gint length, date_length = -1;
+ gint year = 0, month = 0, day = 0, offset = 0, hour = 0, minute = 0;
+ gdouble seconds = 0.0;
+ GTimeZone *tz = NULL;
+ GDateTime *datetime = NULL;
+
+ g_return_val_if_fail (text != NULL, NULL);
+
+ /* Date and time is separated by 'T', 't', or ' '*/
+ for (length = 0; text[length] != '\0'; length++)
+ {
+ if (date_length < 0 && (text[length] == 'T' || text[length] == 't' || text[length] == ' '))
+ date_length = length;
+ }
+
+ if (date_length < 0)
+ return NULL;
+
+ if (!parse_iso8601_date (text, date_length, &year, &month, &day, &offset) ||
+ !parse_iso8601_time (text + date_length + 1, length - (date_length + 1),
+ &hour, &minute, &seconds, &tz))
+ goto out;
+
+ if (tz == NULL)
+ tz = g_time_zone_new_local ();
+ datetime = g_date_time_new (tz, year, month, day, hour, minute, seconds);
+ if (datetime != NULL && offset != 0)
+ datetime->days += offset;
+
+out:
+ if (tz != NULL)
+ g_time_zone_unref (tz);
+ return datetime;
+}
+
/* full new functions {{{1 */
/**
diff --git a/glib/gdatetime.h b/glib/gdatetime.h
index 63942c8..ce2a2c7 100644
--- a/glib/gdatetime.h
+++ b/glib/gdatetime.h
@@ -120,6 +120,9 @@ GDateTime * g_date_time_new_from_timeval_local (const G
GLIB_AVAILABLE_IN_ALL
GDateTime * g_date_time_new_from_timeval_utc (const GTimeVal *tv);
+GLIB_AVAILABLE_IN_2_50
+GDateTime * g_date_time_new_from_iso8601 (const gchar *text);
+
GLIB_AVAILABLE_IN_ALL
GDateTime * g_date_time_new (GTimeZone *tz,
gint year,
diff --git a/glib/tests/gdatetime.c b/glib/tests/gdatetime.c
index f6c3cf0..4ac1754 100644
--- a/glib/tests/gdatetime.c
+++ b/glib/tests/gdatetime.c
@@ -380,6 +380,224 @@ test_GDateTime_new_from_timeval_utc (void)
}
static void
+test_GDateTime_new_from_iso8601 (void)
+{
+ GDateTime *dt;
+
+ /* Need non-empty string */
+ dt = g_date_time_new_from_iso8601 ("");
+ g_assert (dt == NULL);
+
+ /* Needs to be correctly formatted */
+ dt = g_date_time_new_from_iso8601 ("not a date");
+ g_assert (dt == NULL);
+
+ /* Check common case */
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_date_time_unref (dt);
+
+ /* Can't have whitespace */
+ dt = g_date_time_new_from_iso8601 ("2016 08 24T22:10:42");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42 ");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 (" 2016-08-24T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Check lowercase time separator or space allowed */
+ dt = g_date_time_new_from_iso8601 ("2016-08-24t22:10:42");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_date_time_unref (dt);
+ dt = g_date_time_new_from_iso8601 ("2016-08-24 22:10:42");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_date_time_unref (dt);
+
+ /* Check dates without separators allowed */
+ dt = g_date_time_new_from_iso8601 ("20160824T22:10:42");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_date_time_unref (dt);
+
+ /* Months are two digits */
+ dt = g_date_time_new_from_iso8601 ("2016-1-01T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Days are two digits */
+ dt = g_date_time_new_from_iso8601 ("2016-01-1T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Need consistent usage of separators */
+ dt = g_date_time_new_from_iso8601 ("2016-0824T22:10:42");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("201608-24T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Check month within valid range */
+ dt = g_date_time_new_from_iso8601 ("2016-00-13T22:10:42");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("2016-13-13T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Check day within valid range */
+ dt = g_date_time_new_from_iso8601 ("2016-01-00T22:10:42");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("2016-01-32T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Check ordinal days work */
+ dt = g_date_time_new_from_iso8601 ("2016-237T22:10:42");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_date_time_unref (dt);
+ dt = g_date_time_new_from_iso8601 ("2016237T22:10:42");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_date_time_unref (dt);
+
+ /* Check ordinal leap days */
+ dt = g_date_time_new_from_iso8601 ("2016-366T22:10:42");
+ ASSERT_DATE (dt, 2016, 12, 31);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_date_time_unref (dt);
+ dt = g_date_time_new_from_iso8601 ("2017-365T22:10:42");
+ ASSERT_DATE (dt, 2017, 12, 31);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_date_time_unref (dt);
+ dt = g_date_time_new_from_iso8601 ("2017-366T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Days start at 1 */
+ dt = g_date_time_new_from_iso8601 ("2016-000T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Limited to number of days in the year (2016 is a leap year) */
+ dt = g_date_time_new_from_iso8601 ("2016-367T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Days are two digits */
+ dt = g_date_time_new_from_iso8601 ("2016-1T22:10:42");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("2016-12T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Check week days work */
+ dt = g_date_time_new_from_iso8601 ("2016-W34-3T22:10:42");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_date_time_unref (dt);
+ dt = g_date_time_new_from_iso8601 ("2016W343T22:10:42");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_date_time_unref (dt);
+
+ /* We don't support weeks without weekdays (valid ISO 8601) */
+ dt = g_date_time_new_from_iso8601 ("2016-W34T22:10:42");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("2016W34T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Weeks are two digits */
+ dt = g_date_time_new_from_iso8601 ("2016-W3-1T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Weeks start at 1 */
+ dt = g_date_time_new_from_iso8601 ("2016-W00-1T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Limited to number of weeks in the year */
+ dt = g_date_time_new_from_iso8601 ("2016-W53-1T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Limited to number of days in the week */
+ dt = g_date_time_new_from_iso8601 ("2016-W34-0T22:10:42");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("2016-W34-8T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Days are one digit */
+ dt = g_date_time_new_from_iso8601 ("2016-W34-99T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Check week day changes depending on year */
+ dt = g_date_time_new_from_iso8601 ("2017-W34-1T22:10:42");
+ ASSERT_DATE (dt, 2017, 8, 21);
+ g_date_time_unref (dt);
+
+ /* Check week day changes depending on leap years */
+ dt = g_date_time_new_from_iso8601 ("1900-W01-1T22:10:42");
+ ASSERT_DATE (dt, 1900, 1, 1);
+ g_date_time_unref (dt);
+
+ /* YYYY-MM not allowed (NOT valid ISO 8601) */
+ dt = g_date_time_new_from_iso8601 ("2016-08T22:10:42");
+ g_assert (dt == NULL);
+
+ /* We don't support omitted year (valid ISO 8601) */
+ dt = g_date_time_new_from_iso8601 ("--08-24T22:10:42");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("--0824T22:10:42");
+ g_assert (dt == NULL);
+
+ /* Check subseconds work */
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42.123456");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42.123456);
+ g_date_time_unref (dt);
+
+ /* Check time separators optional */
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T221042.123456");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42.123456);
+ g_date_time_unref (dt);
+
+ /* We don't support times without minutes / seconds (valid ISO 8601) */
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T2210");
+ g_assert (dt == NULL);
+
+ /* UTC time uses 'Z' */
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42Z");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_assert_cmpint (g_date_time_get_utc_offset (dt), ==, 0);
+ g_date_time_unref (dt);
+
+ /* Check timezone works */
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42+12:00");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_assert_cmpint (g_date_time_get_utc_offset (dt), ==, 12 * G_TIME_SPAN_HOUR);
+ g_date_time_unref (dt);
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42+12");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_assert_cmpint (g_date_time_get_utc_offset (dt), ==, 12 * G_TIME_SPAN_HOUR);
+ g_date_time_unref (dt);
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22:10:42-02");
+ ASSERT_DATE (dt, 2016, 8, 24);
+ ASSERT_TIME (dt, 22, 10, 42);
+ g_assert_cmpint (g_date_time_get_utc_offset (dt), ==, -2 * G_TIME_SPAN_HOUR);
+ g_date_time_unref (dt);
+
+ /* Timezone seconds not allowed */
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22-12:00:00");
+ g_assert (dt == NULL);
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22-12:00:00.000");
+ g_assert (dt == NULL);
+
+ /* Timezone hours two digits */
+ dt = g_date_time_new_from_iso8601 ("2016-08-24T22-2");
+ g_assert (dt == NULL);
+}
+
+static void
test_GDateTime_to_unix (void)
{
GDateTime *dt;
@@ -1655,6 +1873,7 @@ main (gint argc,
g_test_add_func ("/GDateTime/new_from_unix_utc", test_GDateTime_new_from_unix_utc);
g_test_add_func ("/GDateTime/new_from_timeval", test_GDateTime_new_from_timeval);
g_test_add_func ("/GDateTime/new_from_timeval_utc", test_GDateTime_new_from_timeval_utc);
+ g_test_add_func ("/GDateTime/new_from_iso8601", test_GDateTime_new_from_iso8601);
g_test_add_func ("/GDateTime/new_full", test_GDateTime_new_full);
g_test_add_func ("/GDateTime/now", test_GDateTime_now);
g_test_add_func ("/GDateTime/printf", test_GDateTime_printf);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]