[grilo-plugins] lua-factory: Fix unescaping ANSI HTML entities



commit 60529923dff02d23612ef6ef220ffabc9da24430
Author: Bastien Nocera <hadess hadess net>
Date:   Mon Jun 15 14:01:11 2015 +0200

    lua-factory: Fix unescaping ANSI HTML entities
    
    HTML escape sequences still use Windows-1252/ANSI codes:
    http://www.w3schools.com/charsets/ref_html_ansi.asp
    
    Before this, &#150; would unescape to a non-printable character, when it
    should be unescaping to an ndash character.
    
    This is especially visible in the Guardian Videos source.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=750990

 src/lua-factory/grl-lua-library.c |   23 ++++++++++++++++++++++-
 1 files changed, 22 insertions(+), 1 deletions(-)
---
diff --git a/src/lua-factory/grl-lua-library.c b/src/lua-factory/grl-lua-library.c
index f328b1c..b936370 100644
--- a/src/lua-factory/grl-lua-library.c
+++ b/src/lua-factory/grl-lua-library.c
@@ -65,6 +65,24 @@ char_str (gunichar c,
   return buf;
 }
 
+/* ANSI HTML entities
+ * http://www.w3schools.com/charsets/ref_html_ansi.asp */
+static gchar *
+ansi_char_str (gunichar c,
+               gchar   *buf)
+{
+  gchar from_c[2], *tmp;
+
+  memset (buf, 0, 8);
+  from_c[0] = c;
+  from_c[1] = '\0';
+  tmp = g_convert (from_c, 2, "UTF-8", "Windows-1252", NULL, NULL, NULL);
+  strcpy (buf, tmp);
+  g_free (tmp);
+
+  return buf;
+}
+
 /* Adapted from unescape_gstring_inplace() in gmarkup.c in glib */
 static char *
 unescape_string (const char *orig_from)
@@ -116,7 +134,10 @@ unescape_string (const char *orig_from)
             (0xE000 <= l && l <= 0xFFFD) ||
             (0x10000 <= l && l <= 0x10FFFF)) {
           gchar buf[8];
-          char_str (l, buf);
+          if (l >= 128 && l <= 255)
+            ansi_char_str (l, buf);
+          else
+            char_str (l, buf);
           strcpy (to, buf);
           to += strlen (buf) - 1;
           from = end;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]