[tepl] FileLoader: ASCII -> UTF-8



commit 5c8800a2d1ac2d502b82b045af05aa39607bd73d
Author: Sébastien Wilmet <swilmet gnome org>
Date:   Fri Oct 20 10:29:10 2017 +0200

    FileLoader: ASCII -> UTF-8
    
    See the comment in the code.

 tepl/tepl-file-loader.c |   44 +++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 43 insertions(+), 1 deletions(-)
---
diff --git a/tepl/tepl-file-loader.c b/tepl/tepl-file-loader.c
index ec5a8a7..c1ad356 100644
--- a/tepl/tepl-file-loader.c
+++ b/tepl/tepl-file-loader.c
@@ -872,6 +872,48 @@ out:
        g_clear_object (&converter);
 }
 
+static TeplEncoding *
+create_encoding_for_charset (const gchar *charset)
+{
+       TeplEncoding *encoding_for_charset;
+       TeplEncoding *ascii_encoding;
+       TeplEncoding *locale_encoding;
+
+       g_assert (charset != NULL);
+
+       encoding_for_charset = tepl_encoding_new (charset);
+
+       ascii_encoding = tepl_encoding_new ("ASCII");
+       locale_encoding = tepl_encoding_new_from_locale ();
+
+       /* ASCII -> UTF-8 if locale is UTF-8.
+        *
+        * uchardet returns ASCII if only ASCII chars are present. But since any
+        * UTF-8 char can be inserted in a GtkTextView, it would be annoying for
+        * the user to have a warning each time the text becomes UTF-8. I think
+        * most users expect their files to be UTF-8 if their locale is UTF-8.
+        * The exception here is for example to keep source code ASCII-only,
+        * maybe some projects prefer that, but I think that's the minority of
+        * users.
+        *
+        * TODO: have a list of candidate encodings, and if ASCII is before
+        * UTF-8, keep ASCII. This could be configurable if there is a GSetting
+        * for the candidate encodings, with a GUI to configure the list, like
+        * in gedit.
+        */
+       if (tepl_encoding_equals (encoding_for_charset, ascii_encoding) &&
+           tepl_encoding_is_utf8 (locale_encoding))
+       {
+               tepl_encoding_free (encoding_for_charset);
+               encoding_for_charset = tepl_encoding_new_utf8 ();
+       }
+
+       tepl_encoding_free (ascii_encoding);
+       tepl_encoding_free (locale_encoding);
+
+       return encoding_for_charset;
+}
+
 static void
 determine_encoding (GTask *task)
 {
@@ -912,7 +954,7 @@ determine_encoding (GTask *task)
        charset = uchardet_get_charset (ud);
        if (charset != NULL && charset[0] != '\0')
        {
-               priv->detected_encoding = tepl_encoding_new (charset);
+               priv->detected_encoding = create_encoding_for_charset (charset);
        }
 
        uchardet_delete (ud);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]