[libsocialweb] utils: Add function for unescaping xml entities
- From: Rob Bradford <rbradford src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libsocialweb] utils: Add function for unescaping xml entities
- Date: Tue, 15 Feb 2011 15:42:11 +0000 (UTC)
commit 9d67459bf724056e8c17fe07a852414f7ffdde7a
Author: Rob Bradford <rob linux intel com>
Date: Tue Feb 15 14:45:27 2011 +0000
utils: Add function for unescaping xml entities
Part of the fix for: https://bugs.meego.com/show_bug.cgi?id=10553
libsocialweb/sw-utils.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++
libsocialweb/sw-utils.h | 1 +
2 files changed, 93 insertions(+), 0 deletions(-)
---
diff --git a/libsocialweb/sw-utils.c b/libsocialweb/sw-utils.c
index 5ed7dc4..d7daf4f 100644
--- a/libsocialweb/sw-utils.c
+++ b/libsocialweb/sw-utils.c
@@ -19,6 +19,7 @@
#include <config.h>
#include "sw-utils.h"
#include <string.h>
+#include <stdio.h>
#include <libsoup/soup.h>
time_t
@@ -92,3 +93,94 @@ sw_next_opid (void)
return g_atomic_int_exchange_and_add (&opid, 1);
}
+
+/**
+ * sw_unescape_entities
+ *
+ * Replace the xml entities in the given string in place.
+ *
+ * Returns: the string with the entities replaced
+ */
+gchar *
+sw_unescape_entities (gchar *string)
+{
+ gchar *p = string;
+ gchar bucket[10];
+ size_t length;
+
+ length = strlen (string);
+
+ for (; p[0]; p++)
+ {
+ if (p[0] == '&')
+ {
+ gint length_diff;
+ gchar *q;;
+ gint bucket_i = 0;
+ gunichar replacement = 0;
+ gint replacement_length;
+
+ /* p stays the same until the end of this block */
+
+ q = p + 1; /* Move onto next character */
+
+ /* Fill the bucket with the characters in the entity reference */
+ while (q[0] != ';' && q[0] && bucket_i < 9)
+ {
+ bucket[bucket_i] = q[0];
+ q++;
+ bucket_i++;
+ }
+ bucket[bucket_i]='\0';
+
+ /* http://bit.ly/EJujl */
+ if (g_str_equal (bucket, "quot"))
+ replacement = 0x0022;
+ else if (g_str_equal (bucket, "amp"))
+ replacement = 0x0026;
+ else if (g_str_equal (bucket, "apos"))
+ replacement = 0x0027;
+ else if (g_str_equal (bucket, "lt"))
+ replacement = 0x003c;
+ else if (g_str_equal (bucket, "gt"))
+ replacement = 0x003e;
+ else if (bucket[0] == '#' && bucket[1] == 'x')
+ {
+ /* Convert the bucket hex -> gunichar */
+ sscanf (&bucket[2], "%x", &replacement);
+ }
+ else if (bucket[0] == '#')
+ {
+ /* Convert the bucket decimal -> gunichar */
+ sscanf (&bucket[1], "%u", &replacement);
+ }
+ else
+ {
+ continue;
+ }
+
+ replacement_length = g_unichar_to_utf8 (replacement, p);
+
+ /*
+ * The utf8 representation is always fewer bytes than the entity
+ * string itself
+ */
+ length_diff = bucket_i + 2 - replacement_length;
+ if (length_diff > 0)
+ {
+ size_t len; /* # bytes until the end of the remaining string */
+
+ /* This number *excludes* the \0 */
+ len = length - (p - string + bucket_i + 2);
+ g_memmove (p + replacement_length, p + bucket_i + 2, len + 1);
+ }
+
+ p = p + replacement_length;
+ }
+ }
+
+ if (!g_utf8_validate (string, -1, NULL))
+ g_critical ("Invalid utf-8");
+
+ return string;
+}
diff --git a/libsocialweb/sw-utils.h b/libsocialweb/sw-utils.h
index 376d1c7..107ce29 100644
--- a/libsocialweb/sw-utils.h
+++ b/libsocialweb/sw-utils.h
@@ -24,3 +24,4 @@ char * sw_time_t_to_string (time_t t);
char *sw_hash_string_dict (GHashTable *hash);
int sw_next_opid (void);
+gchar *sw_unescape_entities (gchar *string);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]