[libsocialweb] utils: Add function for unescaping xml entities



commit 9d67459bf724056e8c17fe07a852414f7ffdde7a
Author: Rob Bradford <rob linux intel com>
Date:   Tue Feb 15 14:45:27 2011 +0000

    utils: Add function for unescaping xml entities
    
    Part of the fix for: https://bugs.meego.com/show_bug.cgi?id=10553

 libsocialweb/sw-utils.c |   92 +++++++++++++++++++++++++++++++++++++++++++++++
 libsocialweb/sw-utils.h |    1 +
 2 files changed, 93 insertions(+), 0 deletions(-)
---
diff --git a/libsocialweb/sw-utils.c b/libsocialweb/sw-utils.c
index 5ed7dc4..d7daf4f 100644
--- a/libsocialweb/sw-utils.c
+++ b/libsocialweb/sw-utils.c
@@ -19,6 +19,7 @@
 #include <config.h>
 #include "sw-utils.h"
 #include <string.h>
+#include <stdio.h>
 #include <libsoup/soup.h>
 
 time_t
@@ -92,3 +93,94 @@ sw_next_opid (void)
 
   return g_atomic_int_exchange_and_add (&opid, 1);
 }
+
+/**
+ * sw_unescape_entities
+ *
+ * Replace the xml entities in the given string in place.
+ *
+ * Returns: the string with the entities replaced
+ */
+gchar *
+sw_unescape_entities (gchar *string)
+{
+  gchar *p = string;
+  gchar bucket[10];
+  size_t length;
+
+  length = strlen (string);
+
+  for (; p[0]; p++)
+    {
+      if (p[0] == '&')
+        {
+          gint length_diff;
+          gchar *q;;
+          gint bucket_i = 0;
+          gunichar replacement = 0;
+          gint replacement_length;
+
+          /* p stays the same until the end of this block */
+
+          q = p + 1; /* Move onto next character */
+
+          /* Fill the bucket with the characters in the entity reference */
+          while (q[0] != ';' && q[0] && bucket_i < 9)
+            {
+              bucket[bucket_i] = q[0];
+              q++;
+              bucket_i++;
+            }
+          bucket[bucket_i]='\0';
+
+          /* http://bit.ly/EJujl */
+          if (g_str_equal (bucket, "quot"))
+            replacement = 0x0022;
+          else if (g_str_equal (bucket, "amp"))
+            replacement = 0x0026;
+          else if (g_str_equal (bucket, "apos"))
+            replacement = 0x0027;
+          else if (g_str_equal (bucket, "lt"))
+            replacement = 0x003c;
+          else if (g_str_equal (bucket, "gt"))
+            replacement = 0x003e;
+          else if (bucket[0] == '#' && bucket[1] == 'x')
+            {
+              /* Convert the bucket hex -> gunichar */
+              sscanf (&bucket[2], "%x", &replacement);
+            }
+          else if (bucket[0] == '#')
+            {
+              /* Convert the bucket decimal -> gunichar */
+              sscanf (&bucket[1], "%u", &replacement);
+            }
+          else
+            {
+              continue;
+            }
+
+          replacement_length = g_unichar_to_utf8 (replacement, p);
+
+          /*
+           * The utf8 representation is always fewer bytes than the entity
+           * string itself
+           */
+          length_diff = bucket_i + 2 - replacement_length;
+          if (length_diff > 0)
+            {
+              size_t len; /* # bytes until the end of the remaining string */
+
+              /* This number *excludes* the \0 */
+              len = length - (p - string + bucket_i + 2);
+              g_memmove (p + replacement_length, p + bucket_i + 2, len + 1);
+            }
+
+            p = p + replacement_length;
+        }
+    }
+
+  if (!g_utf8_validate (string, -1, NULL))
+      g_critical ("Invalid utf-8");
+
+  return string;
+}
diff --git a/libsocialweb/sw-utils.h b/libsocialweb/sw-utils.h
index 376d1c7..107ce29 100644
--- a/libsocialweb/sw-utils.h
+++ b/libsocialweb/sw-utils.h
@@ -24,3 +24,4 @@ char * sw_time_t_to_string (time_t t);
 char *sw_hash_string_dict (GHashTable *hash);
 
 int sw_next_opid (void);
+gchar *sw_unescape_entities (gchar *string);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]