[perl-Glib] GPerlArgv: correctly handle utf8-encoded strings



commit 24e411443fc9e8d0fccae9a832bf4a2aa4a7fc5e
Author: Torsten SchÃnfeld <kaffeetisch gmx de>
Date:   Sat Jan 5 23:39:12 2013 +0100

    GPerlArgv: correctly handle utf8-encoded strings
    
    Store and restore the UTF8 flag when going from PV to char* and back.  This
    should fix issues seen with utf8-encoded strings in @ARGV with, for example,
    Gtk2->init.

 Glib.xs     |   15 +++++++++++++++
 NEWS        |    2 ++
 t/options.t |   14 +++++++++++++-
 3 files changed, 30 insertions(+), 1 deletions(-)
---
diff --git a/Glib.xs b/Glib.xs
index 5dfe083..3046dbb 100644
--- a/Glib.xs
+++ b/Glib.xs
@@ -186,6 +186,12 @@ typedef struct {
 	 * Used to free the copied strings reliably even if they are removed
 	 * from argv. */
 	char **shadows;
+	 /* Hash table (pointer (not string) -> utf8 flag) so we can completely
+	  * restore PVs from the strings.  We cannot simply use an array of
+	  * utf8 flags because strings might be removed from argv, in which
+	  * case we wouldn't know which entry in the utf8 flag array
+	  * corresponds to which string. */
+	GHashTable *utf8_flags;
 } GPerlArgvPriv;
 
 =item GPerlArgv * gperl_argv_new ()
@@ -238,6 +244,7 @@ gperl_argv_new ()
 
 	priv = g_new (GPerlArgvPriv, 1);
 	priv->shadows = g_new0 (char*, pargv->argc);
+	priv->utf8_flags = g_hash_table_new (NULL, NULL);
 	pargv->priv = priv;
 
 	pargv->argv[0] = SvPV_nolen (ARGV0);
@@ -246,8 +253,12 @@ gperl_argv_new ()
 		SV ** svp = av_fetch (ARGV, i, 0);
 		if (svp && gperl_sv_is_defined (*svp)) {
 			const char *arg = SvPV_nolen (*svp);
+			gboolean utf8_flag = !!SvUTF8 (*svp);
 			priv->shadows[i] = pargv->argv[i+1]
 			                 = g_strdup (arg);
+			g_hash_table_insert (priv->utf8_flags,
+			                     pargv->argv[i+1],
+			                     GINT_TO_POINTER (utf8_flag));
 		}
 	}
 
@@ -273,7 +284,10 @@ gperl_argv_update (GPerlArgv *pargv)
 	for (i = 1 ; i < pargv->argc ; i++) {
 		SV *sv;
 		const char *arg = pargv->argv[i];
+		gboolean utf8_flag = !!g_hash_table_lookup (priv->utf8_flags, arg);
 		sv = newSVpv (arg, PL_na);
+		if (utf8_flag)
+			SvUTF8_on (sv);
 		av_push (ARGV, sv);
 	}
 }
@@ -288,6 +302,7 @@ gperl_argv_free (GPerlArgv *pargv)
 {
 	GPerlArgvPriv *priv = pargv->priv;
 	g_strfreev (priv->shadows);
+	g_hash_table_destroy (priv->utf8_flags);
 	g_free (pargv->priv);
 	g_free (pargv->argv);
 	g_free (pargv);
diff --git a/NEWS b/NEWS
index ad71da0..374c20b 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,8 @@ Overview of changes in Glib <next> (unstable)
 * Make Glib::Object subclassing more robust.  This should in particular fix
   issues revealed by the change to hash randomization introduced in perl
   5.17.6.
+* Correctly handle utf8-encoded strings in GPerlArgv.  This should fix issues
+  seen with utf8-encoded strings in @ARGV with, for example, Gtk2->init.
 
 Overview of changes in Glib 1.280 (stable)
 ==========================================
diff --git a/t/options.t b/t/options.t
index 401abd6..dc807e0 100644
--- a/t/options.t
+++ b/t/options.t
@@ -9,7 +9,7 @@ use Glib qw(TRUE FALSE);
 unless (Glib -> CHECK_VERSION (2, 6, 0)) {
   plan skip_all => 'the option stuff is new in 2.6';
 } else {
-  plan tests => 29;
+  plan tests => 33;
 }
 
 # --------------------------------------------------------------------------- #
@@ -154,6 +154,18 @@ my $entries = [
     is_deeply ($string_array, [qw/aaa bbb/]);
     is_deeply ($filename_array, [qw(/usr/bin/bla ./harness)]);
   }
+
+  # Test that there is no double-encoding for utf8-encoded strings.
+  {
+    @ARGV = qw(-s â â);
+    $context -> parse();
+
+    is ($string, 'â');
+    is (length $string, 1);
+
+    is ($ARGV[0], 'â');
+    is (length $ARGV[0], 1);
+  }
 }
 
 # --------------------------------------------------------------------------- #



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]