[goffice] Encoding: improve character encoding detection.



commit 38f7e41fc3f2b1bda1bfb8d689fe24f2e1ed8a34
Author: Morten Welinder <terra gnome org>
Date:   Wed Dec 31 19:01:57 2014 -0500

    Encoding: improve character encoding detection.

 ChangeLog                      |    6 ++++++
 goffice/utils/go-glib-extras.c |    7 ++++---
 2 files changed, 10 insertions(+), 3 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 5c5fbd7..995e82e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2014-12-31  Morten Welinder  <terra gnome org>
+
+       * goffice/utils/go-glib-extras.c (go_guess_encoding): Lean a bit
+       heavier on libxml's encoding guess instead of g_get_charset which
+       may be a bit random.
+
 2014-12-29  Morten Welinder  <terra gnome org>
 
        * goffice/gtk/goffice-gtk.h (go_gtk_builder_new_internal): Remove.
diff --git a/goffice/utils/go-glib-extras.c b/goffice/utils/go-glib-extras.c
index e129c93..7750f45 100644
--- a/goffice/utils/go-glib-extras.c
+++ b/goffice/utils/go-glib-extras.c
@@ -754,8 +754,7 @@ go_guess_encoding (const char *raw, size_t len, const char *user_guess,
 
                switch (try) {
                case 1: guess = user_guess; break;
-               case 2: g_get_charset (&guess); break;
-               case 3: {
+               case 2: {
                        xmlCharEncoding enc =
                                xmlDetectCharEncoding ((const unsigned char*)raw, len);
                        switch (enc) {
@@ -775,6 +774,7 @@ go_guess_encoding (const char *raw, size_t len, const char *user_guess,
                        }
                        break;
                }
+               case 3: g_get_charset (&guess); break;
                case 4: guess = "ASCII"; break;
                case 5: guess = "ISO-8859-1"; break;
                case 6: guess = "UTF-8"; break;
@@ -785,7 +785,8 @@ go_guess_encoding (const char *raw, size_t len, const char *user_guess,
                        continue;
 
                if (debug)
-                       g_printerr ("Trying %s as encoding.\n", guess);
+                       g_printerr ("Trying %s as encoding using method %d.\n",
+                                   guess, try);
 
                utf8_data = g_convert (raw, len, "UTF-8", guess,
                                       &bytes_read, &bytes_written, &error);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]