[gobject-introspection] Add proper unicode support to the source scanner



commit b6405089448ea588989faf1b4bff3aa96cd5c291
Author: Johan Dahlin <johan gnome org>
Date:   Thu Dec 2 14:10:33 2010 -0200

    Add proper unicode support to the source scanner
    
    The assumption is that the only allowed source encoding
    is utf-8. Always strings as unicode and fix up the transformer
    and xml writer to properly output utf-8.

 giscanner/giscannermodule.c |   42 ++++++++++++++++++++++++++++++++++++------
 giscanner/transformer.py    |    2 +-
 giscanner/xmlwriter.py      |    2 +-
 3 files changed, 38 insertions(+), 8 deletions(-)
---
diff --git a/giscanner/giscannermodule.c b/giscanner/giscannermodule.c
index a8061db..0f94240 100644
--- a/giscanner/giscannermodule.c
+++ b/giscanner/giscannermodule.c
@@ -564,7 +564,7 @@ static int calc_attrs_length(PyObject *attributes, int indent,
 
   for (i = 0; i < PyList_Size (attributes); ++i)
     {
-      PyObject *tuple;
+      PyObject *tuple, *pyvalue;
       char *attr, *value;
       char *escaped;
 
@@ -572,9 +572,24 @@ static int calc_attrs_length(PyObject *attributes, int indent,
       if (PyTuple_GetItem(tuple, 1) == Py_None)
 	continue;
 
-      if (!PyArg_ParseTuple(tuple, "ss", &attr, &value))
+      if (!PyArg_ParseTuple(tuple, "sO", &attr, &pyvalue))
         return -1;
 
+      if (PyUnicode_Check(pyvalue)) {
+        PyObject *s = PyUnicode_AsUTF8String(pyvalue);
+        if (!s) {
+          return -1;
+        }
+        value = PyString_AsString(s);
+        Py_DECREF(s);
+      } else if (PyString_Check(pyvalue)) {
+        value = PyString_AsString(pyvalue);
+      } else {
+        PyErr_SetString(PyExc_TypeError,
+                        "value must be string or unicode");
+        return -1;
+      }
+
       escaped = g_markup_escape_text (value, -1);
       attr_length += 2 + strlen(attr) + strlen(escaped) + 2;
       g_free(escaped);
@@ -605,7 +620,7 @@ pygi_collect_attributes (PyObject *self,
     return NULL;
 
   if (attributes == Py_None || !PyList_Size(attributes))
-    return PyString_FromString("");
+    return PyUnicode_FromString("");
 
   len = calc_attrs_length(attributes, indent, self_indent);
   if (len < 0)
@@ -620,7 +635,7 @@ pygi_collect_attributes (PyObject *self,
 
   for (i = 0; i < PyList_Size (attributes); ++i)
     {
-      PyObject *tuple;
+      PyObject *tuple, *pyvalue;
       char *attr, *value, *escaped;
 
       tuple = PyList_GetItem (attributes, i);
@@ -643,9 +658,24 @@ pygi_collect_attributes (PyObject *self,
 	continue;
 
       /* this leaks, but we exit after, so */
-      if (!PyArg_ParseTuple(tuple, "ss", &attr, &value))
+      if (!PyArg_ParseTuple(tuple, "sO", &attr, &pyvalue))
         return NULL;
 
+      if (PyUnicode_Check(pyvalue)) {
+        PyObject *s = PyUnicode_AsUTF8String(pyvalue);
+        if (!s) {
+          return NULL;
+        }
+        value = PyString_AsString(s);
+        Py_DECREF(s);
+      } else if (PyString_Check(pyvalue)) {
+        value = PyString_AsString(pyvalue);
+      } else {
+        PyErr_SetString(PyExc_TypeError,
+                        "value must be string or unicode");
+        return NULL;
+      }
+
       if (indent_len && !first)
 	{
 	  g_string_append_c (attr_value, '\n');
@@ -663,7 +693,7 @@ pygi_collect_attributes (PyObject *self,
 	first = FALSE;
   }
 
-  return PyString_FromString (g_string_free (attr_value, FALSE));
+  return PyUnicode_FromString (g_string_free (attr_value, FALSE));
 }
 
 /* Module */
diff --git a/giscanner/transformer.py b/giscanner/transformer.py
index f07e8d1..4cd2448 100644
--- a/giscanner/transformer.py
+++ b/giscanner/transformer.py
@@ -608,7 +608,7 @@ raise ValueError."""
             return None
         if symbol.const_string is not None:
             typeval = ast.TYPE_STRING
-            value = symbol.const_string
+            value = unicode(symbol.const_string, 'utf-8')
         elif symbol.const_int is not None:
             typeval = ast.TYPE_INT
             value = '%d' % (symbol.const_int, )
diff --git a/giscanner/xmlwriter.py b/giscanner/xmlwriter.py
index a418cc4..84c24c0 100755
--- a/giscanner/xmlwriter.py
+++ b/giscanner/xmlwriter.py
@@ -120,7 +120,7 @@ class XMLWriter(object):
         if indent:
             self._data.write('%s%s%s' % (
                     self._indent_char * self._indent,
-                    line,
+                    line.encode('utf-8'),
                     self._newline_char))
         else:
             self._data.write('%s%s' % (line, self._newline_char))



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]