[tepl] FileLoader: ASCII -> UTF-8
- From: Sébastien Wilmet <swilmet src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tepl] FileLoader: ASCII -> UTF-8
- Date: Fri, 20 Oct 2017 09:04:38 +0000 (UTC)
commit 5c8800a2d1ac2d502b82b045af05aa39607bd73d
Author: Sébastien Wilmet <swilmet gnome org>
Date: Fri Oct 20 10:29:10 2017 +0200
FileLoader: ASCII -> UTF-8
See the comment in the code.
tepl/tepl-file-loader.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 43 insertions(+), 1 deletions(-)
---
diff --git a/tepl/tepl-file-loader.c b/tepl/tepl-file-loader.c
index ec5a8a7..c1ad356 100644
--- a/tepl/tepl-file-loader.c
+++ b/tepl/tepl-file-loader.c
@@ -872,6 +872,48 @@ out:
g_clear_object (&converter);
}
+static TeplEncoding *
+create_encoding_for_charset (const gchar *charset)
+{
+ TeplEncoding *encoding_for_charset;
+ TeplEncoding *ascii_encoding;
+ TeplEncoding *locale_encoding;
+
+ g_assert (charset != NULL);
+
+ encoding_for_charset = tepl_encoding_new (charset);
+
+ ascii_encoding = tepl_encoding_new ("ASCII");
+ locale_encoding = tepl_encoding_new_from_locale ();
+
+ /* ASCII -> UTF-8 if locale is UTF-8.
+ *
+ * uchardet returns ASCII if only ASCII chars are present. But since any
+ * UTF-8 char can be inserted in a GtkTextView, it would be annoying for
+ * the user to have a warning each time the text becomes UTF-8. I think
+ * most users expect their files to be UTF-8 if their locale is UTF-8.
+ * The exception here is for example to keep source code ASCII-only,
+ * maybe some projects prefer that, but I think that's the minority of
+ * users.
+ *
+ * TODO: have a list of candidate encodings, and if ASCII is before
+ * UTF-8, keep ASCII. This could be configurable if there is a GSetting
+ * for the candidate encodings, with a GUI to configure the list, like
+ * in gedit.
+ */
+ if (tepl_encoding_equals (encoding_for_charset, ascii_encoding) &&
+ tepl_encoding_is_utf8 (locale_encoding))
+ {
+ tepl_encoding_free (encoding_for_charset);
+ encoding_for_charset = tepl_encoding_new_utf8 ();
+ }
+
+ tepl_encoding_free (ascii_encoding);
+ tepl_encoding_free (locale_encoding);
+
+ return encoding_for_charset;
+}
+
static void
determine_encoding (GTask *task)
{
@@ -912,7 +954,7 @@ determine_encoding (GTask *task)
charset = uchardet_get_charset (ud);
if (charset != NULL && charset[0] != '\0')
{
- priv->detected_encoding = tepl_encoding_new (charset);
+ priv->detected_encoding = create_encoding_for_charset (charset);
}
uchardet_delete (ud);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]