[libgdata] [gd] Make string parsing UTF-8–safe



commit afd50ddc91bcd4a9cb1da17181f1f2e6658eb0ec
Author: Philip Withnall <philip tecnocode co uk>
Date:   Mon Jan 25 15:15:08 2010 +0000

    [gd] Make string parsing UTF-8â??safe
    
    Make the various small bits of string parsing/handling (which all occur in
    the GData code) UTF-8 safe, and add some test cases.

 gdata/gd/gdata-gd-phone-number.c   |   14 ++------------
 gdata/gd/gdata-gd-postal-address.c |   14 ++------------
 gdata/gdata-parser.c               |   22 ++++++++++++++++++++++
 gdata/gdata-parser.h               |    1 +
 gdata/tests/general.c              |    8 ++++++++
 5 files changed, 35 insertions(+), 24 deletions(-)
---
diff --git a/gdata/gd/gdata-gd-phone-number.c b/gdata/gd/gdata-gd-phone-number.c
index 6fa1a5c..acc0305 100644
--- a/gdata/gd/gdata-gd-phone-number.c
+++ b/gdata/gd/gdata-gd-phone-number.c
@@ -411,23 +411,13 @@ gdata_gd_phone_number_get_number (GDataGDPhoneNumber *self)
 void
 gdata_gd_phone_number_set_number (GDataGDPhoneNumber *self, const gchar *number)
 {
-	gint len;
-
 	g_return_if_fail (GDATA_IS_GD_PHONE_NUMBER (self));
 	g_return_if_fail (number != NULL && *number != '\0');
 
-	g_free (self->priv->number);
-
 	/* Trim leading and trailing whitespace from the number.
 	 * See here: http://code.google.com/apis/gdata/docs/1.0/elements.html#gdPhoneNumber */
-	while (*number != '\0' && g_ascii_isspace (*number))
-		number++;
-
-	len = strlen (number);
-	while (len > 0 && g_ascii_isspace (number[len - 1]))
-		len--;
-
-	self->priv->number = g_strndup (number, len);
+	g_free (self->priv->number);
+	self->priv->number = gdata_parser_utf8_trim_whitespace (number);
 	g_object_notify (G_OBJECT (self), "number");
 }
 
diff --git a/gdata/gd/gdata-gd-postal-address.c b/gdata/gd/gdata-gd-postal-address.c
index 6ec22dd..b46928e 100644
--- a/gdata/gd/gdata-gd-postal-address.c
+++ b/gdata/gd/gdata-gd-postal-address.c
@@ -734,23 +734,13 @@ gdata_gd_postal_address_get_address (GDataGDPostalAddress *self)
 void
 gdata_gd_postal_address_set_address (GDataGDPostalAddress *self, const gchar *address)
 {
-	gint len;
-
 	g_return_if_fail (GDATA_IS_GD_POSTAL_ADDRESS (self));
 	g_return_if_fail (address != NULL && *address != '\0');
 
-	g_free (self->priv->formatted_address);
-
 	/* Trim leading and trailing whitespace from the address.
 	 * See here: http://code.google.com/apis/gdata/docs/1.0/elements.html#gdPostalAddress */
-	while (*address != '\0' && g_ascii_isspace (*address))
-		address++;
-
-	len = strlen (address);
-	while (len > 0 && g_ascii_isspace (address[len - 1]))
-		len--;
-
-	self->priv->formatted_address = g_strndup (address, len);
+	g_free (self->priv->formatted_address);
+	self->priv->formatted_address = gdata_parser_utf8_trim_whitespace (address);
 	g_object_notify (G_OBJECT (self), "address");
 }
 
diff --git a/gdata/gdata-parser.c b/gdata/gdata-parser.c
index c58bcba..7f13761 100644
--- a/gdata/gdata-parser.c
+++ b/gdata/gdata-parser.c
@@ -271,3 +271,25 @@ gdata_parser_string_append_escaped (GString *xml_string, const gchar *pre, const
 	if (post != NULL)
 		g_string_append (xml_string, post);
 }
+
+gchar *
+gdata_parser_utf8_trim_whitespace (const gchar *s)
+{
+	glong len;
+	const gchar *_s;
+
+	/* Skip the leading whitespace */
+	while (*s != '\0' && g_unichar_isspace (g_utf8_get_char (s)))
+		s = g_utf8_next_char (s);
+
+	/* Find the end of the string and backtrack until we've passed all the whitespace */
+	len = g_utf8_strlen (s, -1);
+	_s = g_utf8_offset_to_pointer (s, len - 1);
+	while (len > 0 && g_unichar_isspace (g_utf8_get_char (_s))) {
+		_s = g_utf8_prev_char (_s);
+		len--;
+	}
+	_s = g_utf8_next_char (_s);
+
+	return g_strndup (s, _s - s);
+}
diff --git a/gdata/gdata-parser.h b/gdata/gdata-parser.h
index e7c33fc..1d3474e 100644
--- a/gdata/gdata-parser.h
+++ b/gdata/gdata-parser.h
@@ -34,6 +34,7 @@ gboolean gdata_parser_error_duplicate_element (xmlNode *element, GError **error)
 gboolean gdata_parser_time_val_from_date (const gchar *date, GTimeVal *_time);
 gchar *gdata_parser_date_from_time_val (GTimeVal *_time) G_GNUC_WARN_UNUSED_RESULT;
 void gdata_parser_string_append_escaped (GString *xml_string, const gchar *pre, const gchar *element_content, const gchar *post);
+gchar *gdata_parser_utf8_trim_whitespace (const gchar *s) G_GNUC_WARN_UNUSED_RESULT;
 
 G_END_DECLS
 
diff --git a/gdata/tests/general.c b/gdata/tests/general.c
index 7329d6a..412248e 100644
--- a/gdata/tests/general.c
+++ b/gdata/tests/general.c
@@ -1526,6 +1526,10 @@ test_gd_phone_number (void)
 				"uri='tel:+12065551212' rel='http://schemas.google.com/g/2005#mobile' label='Personal &amp; business calls only' "
 				"primary='false'>+1 206 555 1212</gd:phoneNumber>");
 	g_free (xml);
+
+	/* Check we trim whitespace properly, and respect Unicode characters */
+	gdata_gd_phone_number_set_number (phone, "  	 0123456 (789) ëxt 300  ");
+	g_assert_cmpstr (gdata_gd_phone_number_get_number (phone), ==, "0123456 (789) ëxt 300");
 	g_object_unref (phone);
 
 	/* Now parse a phone number with less information available, but some extraneous whitespace */
@@ -1599,6 +1603,10 @@ test_gd_postal_address (void)
 				"<gd:postcode>NY 10036</gd:postcode>"
 			 "</gd:structuredPostalAddress>");
 	g_free (xml);
+
+	/* Check we trim whitespace properly, and respect Unicode characters */
+	gdata_gd_postal_address_set_address (postal, "  	 Schöne Grü�e Stra�e\nGermany  ");
+	g_assert_cmpstr (gdata_gd_postal_address_get_address (postal), ==, "Schöne Grü�e Stra�e\nGermany");
 	g_object_unref (postal);
 
 	/* Now parse an address with less information available */



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]