[gthumb] convert all the metadata to utf-8



commit 05863d0f58def59991d2206f7185a6b56f83d5d3
Author: Paolo Bacchilega <paobac src gnome org>
Date:   Mon Nov 3 11:34:06 2014 +0100

    convert all the metadata to utf-8

 extensions/exiv2_tools/exiv2-utils.cpp |   43 +++++++++++-----------
 gthumb/glib-utils.c                    |   61 +++++++++++++++++++++++++++++--
 gthumb/glib-utils.h                    |    5 +++
 gthumb/test-glib-utils.c               |   55 +++++++++++++++++++++++++++--
 4 files changed, 135 insertions(+), 29 deletions(-)
---
diff --git a/extensions/exiv2_tools/exiv2-utils.cpp b/extensions/exiv2_tools/exiv2-utils.cpp
index 70a1533..b4a8fa5 100644
--- a/extensions/exiv2_tools/exiv2-utils.cpp
+++ b/extensions/exiv2_tools/exiv2-utils.cpp
@@ -244,39 +244,43 @@ create_metadata (const char *key,
                 const char *category,
                 const char *type_name)
 {
+       char            *formatted_value_utf8;
        char            *attribute;
        GthMetadataInfo *metadata_info;
        GthMetadata     *metadata;
        char            *description_utf8;
-       char            *formatted_value_utf8;
 
-       if (_g_utf8_all_spaces (formatted_value))
+       formatted_value_utf8 = _g_utf8_from_any (formatted_value);
+       if (_g_utf8_all_spaces (formatted_value_utf8))
                return NULL;
 
+       description_utf8 = _g_utf8_from_any (description);
+
        attribute = exiv2_key_to_attribute (key);
-       description_utf8 = g_locale_to_utf8 (description, -1, NULL, NULL, NULL);
        if (attribute_is_date (attribute)) {
                GTimeVal time_;
 
+               g_free (formatted_value_utf8);
+               formatted_value_utf8 = NULL;
+
                if (_g_time_val_from_exif_date (raw_value, &time_))
                        formatted_value_utf8 = _g_time_val_strftime (&time_, "%x %X");
                else
                        formatted_value_utf8 = g_locale_to_utf8 (formatted_value, -1, NULL, NULL, NULL);
-               if (formatted_value_utf8 == NULL)
-                       formatted_value_utf8 = g_strdup (INVALID_VALUE);
        }
-       else {
-               const char *formatted_clean;
-
-               if (strncmp (formatted_value, "lang=", 5) == 0)
-                       formatted_clean = strchr (formatted_value, ' ') + 1;
-               else
-                       formatted_clean = formatted_value;
-               formatted_value_utf8 = g_locale_to_utf8 (formatted_clean, -1, NULL, NULL, NULL);
-               if (formatted_value_utf8 == NULL)
-                       formatted_value_utf8 = g_strdup (INVALID_VALUE);
+       else if (_g_utf8_has_prefix (formatted_value_utf8, "lang=")) {
+               int   pos;
+               char *formatted_clean;
+
+               pos = _g_utf8_first_ascii_space (formatted_value_utf8);
+               formatted_clean = _g_utf8_remove_prefix (formatted_value_utf8, pos + 1);
+               g_free (formatted_value_utf8);
+               formatted_value_utf8 = formatted_clean;
        }
 
+       if (formatted_value_utf8 == NULL)
+               formatted_value_utf8 = g_strdup (INVALID_VALUE);
+
        metadata_info = gth_main_get_metadata_info (attribute);
        if ((metadata_info == NULL) && (category != NULL)) {
                GthMetadataInfo info;
@@ -464,19 +468,14 @@ set_attribute_from_metadata (GFileInfo  *info,
                      "value-type", &type_name,
                      NULL);
 
-       formatted_value_utf8 = _g_utf8_try_from_any (formatted_value);
-       raw_value_utf8 = _g_utf8_try_from_any (raw_value);
-
        set_file_info (info,
                       attribute,
                       description,
-                      formatted_value_utf8,
-                      raw_value_utf8,
+                      formatted_value,
+                      raw_value,
                       NULL,
                       type_name);
 
-       g_free (raw_value_utf8);
-       g_free (formatted_value_utf8);
        g_free (description);
        g_free (formatted_value);
        g_free (raw_value);
diff --git a/gthumb/glib-utils.c b/gthumb/glib-utils.c
index 0225dcd..8fb6573 100644
--- a/gthumb/glib-utils.c
+++ b/gthumb/glib-utils.c
@@ -865,6 +865,62 @@ _g_replace_pattern (const char *utf8_text,
 }
 
 
+int
+_g_utf8_first_ascii_space (const char *str)
+{
+       const char *pos;
+
+       pos = str;
+       while (pos != NULL) {
+               gunichar c = g_utf8_get_char (pos);
+               if (c == 0)
+                       break;
+               if (g_ascii_isspace (c))
+                       return g_utf8_pointer_to_offset (str, pos);
+               pos = g_utf8_next_char (pos);
+       }
+
+       return -1;
+}
+
+
+gboolean
+_g_utf8_has_prefix (const char  *string,
+                   const char  *prefix)
+{
+       char     *substring;
+       gboolean  result;
+
+       if (string == NULL)
+               return FALSE;
+       if (prefix == NULL)
+               return TRUE;
+
+       substring = g_utf8_substring (string, 0, g_utf8_strlen (prefix, -1));
+       if (substring == NULL)
+               return FALSE;
+
+       result = g_utf8_collate (substring, prefix) == 0;
+       g_free (substring);
+
+       return result;
+}
+
+
+char *
+_g_utf8_remove_prefix (const char *string,
+                      int         prefix_length)
+{
+       int str_length;
+
+       str_length = g_utf8_strlen (string, -1);
+       if (str_length <= prefix_length)
+               return NULL;
+
+       return g_utf8_substring (string, prefix_length, str_length);
+}
+
+
 char *
 _g_utf8_replace (const char  *string,
                 const char  *pattern,
@@ -1037,11 +1093,8 @@ _g_utf8_try_from_any (const char *str)
        if (str == NULL)
                return NULL;
 
-       if (! g_utf8_validate (str, -1, NULL)) {
+       if (! g_utf8_validate (str, -1, NULL))
                utf8_str = g_locale_to_utf8 (str, -1, NULL, NULL, NULL);
-               if (utf8_str == NULL)
-                       utf8_str = g_utf16_to_utf8 ((gunichar2 *) str, -1, NULL, NULL, NULL);
-       }
        else
                utf8_str = g_strdup (str);
 
diff --git a/gthumb/glib-utils.h b/gthumb/glib-utils.h
index a7eb2d3..0f0552e 100644
--- a/gthumb/glib-utils.h
+++ b/gthumb/glib-utils.h
@@ -180,6 +180,11 @@ char *          _g_replace                       (const char  *str,
 char *          _g_replace_pattern               (const char  *utf8_text,
                                                  gunichar     pattern,
                                                  const char  *value);
+int             _g_utf8_first_ascii_space        (const char  *string);
+gboolean        _g_utf8_has_prefix               (const char  *string,
+                                                 const char  *prefix);
+char *         _g_utf8_remove_prefix            (const char  *string,
+                                                 int         prefix_length);
 char *          _g_utf8_replace                  (const char  *string,
                                                  const char  *pattern,
                                                  const char  *replacement);
diff --git a/gthumb/test-glib-utils.c b/gthumb/test-glib-utils.c
index 4e8a2c4..aaa05bb 100644
--- a/gthumb/test-glib-utils.c
+++ b/gthumb/test-glib-utils.c
@@ -59,7 +59,8 @@ test_regexp (void)
        }
        b[j] = NULL;
        attributes = g_strjoinv (",", b);
-       g_print ("==> %s\n", attributes);
+
+       g_assert_cmpstr (attributes, ==, "Exif::Image::DateTime,File::Size");
 
        g_free (attributes);
        g_free (b);
@@ -68,6 +69,52 @@ test_regexp (void)
 }
 
 
+static void
+test_g_utf8_has_prefix (void)
+{
+       g_assert_true (_g_utf8_has_prefix ("lang=正體字/繁體字 中华人民共和国", "lang="));
+}
+
+
+static void
+test_g_utf8_first_space (void)
+{
+       g_assert_cmpint (_g_utf8_first_ascii_space (NULL), ==, -1);
+       g_assert_cmpint (_g_utf8_first_ascii_space (""), ==, -1);
+       g_assert_cmpint (_g_utf8_first_ascii_space ("lang=FR langue d’oïl"), ==, 7);
+       g_assert_cmpint (_g_utf8_first_ascii_space ("正體字"), ==, -1);
+       g_assert_cmpint (_g_utf8_first_ascii_space ("lang=正體字/繁體字 中华人民共和国"), ==, 12);
+}
+
+
+static void
+test_remove_lang_from_utf8_string (const char *value,
+                                  const char *expected)
+{
+       char *result = NULL;
+
+       if (_g_utf8_has_prefix (value, "lang=")) {
+               int pos = _g_utf8_first_ascii_space (value);
+               if (pos > 0)
+                       result = _g_utf8_remove_prefix (value, pos + 1);
+       }
+
+       g_assert_true (result != NULL);
+       g_assert_true (g_utf8_collate (result, expected) == 0);
+
+       g_free (result);
+}
+
+
+static void
+test_remove_lang_from_utf8_string_all (void)
+{
+       test_remove_lang_from_utf8_string ("lang=EN hello", "hello");
+       test_remove_lang_from_utf8_string ("lang=FR langue d’oïl", "langue d’oïl");
+       test_remove_lang_from_utf8_string ("lang=正體字/繁體字 中华人民共和国", "中华人民共和国");
+}
+
+
 int
 main (int   argc,
       char *argv[])
@@ -75,8 +122,10 @@ main (int   argc,
        g_test_init (&argc, &argv, NULL);
 
        g_test_add_func ("/glib-utils/_g_rand_string/1", test_g_rand_string);
-
-       test_regexp ();
+       g_test_add_func ("/glib-utils/regex", test_regexp);
+       g_test_add_func ("/glib-utils/_g_utf8_has_prefix/1", test_g_utf8_has_prefix);
+       g_test_add_func ("/glib-utils/_g_utf8_first_space/1", test_g_utf8_first_space);
+       g_test_add_func ("/glib-utils/remove_lang_from_utf8_string/1", test_remove_lang_from_utf8_string_all);
 
        return g_test_run ();
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]