[glib/wip/gdesktopappinfo] desktop index: Deal with multiple languages



commit 3e3bc2ded66c441458a8394322c12692bfd4cf37
Author: Ryan Lortie <desrt desrt ca>
Date:   Wed Sep 11 22:53:14 2013 -0400

    desktop index: Deal with multiple languages
    
    In the event that the LANGUAGE environment variable is used, search for
    matching applications using the closest-matching text index for *each*
    language.

 gio/gdesktopappinfo.c |  327 +++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 264 insertions(+), 63 deletions(-)
---
diff --git a/gio/gdesktopappinfo.c b/gio/gdesktopappinfo.c
index eec7366..c61db90 100644
--- a/gio/gdesktopappinfo.c
+++ b/gio/gdesktopappinfo.c
@@ -29,6 +29,7 @@
 #include <errno.h>
 #include <string.h>
 #include <unistd.h>
+#include <locale.h>
 
 #ifdef HAVE_CRT_EXTERNS_H
 #include <crt_externs.h>
@@ -147,6 +148,141 @@ G_DEFINE_TYPE_WITH_CODE (GDesktopAppInfo, g_desktop_app_info, G_TYPE_OBJECT,
 G_LOCK_DEFINE_STATIC (g_desktop_env);
 static gchar *g_desktop_env = NULL;
 
+/* locale utilities {{{1 */
+
+/* We need more control over the output of this function than we get
+ * from the public APIs of glib, so roll our own version of this.
+ *
+ * We don't make this a public API of GLib because it's extremely
+ * difficult to get this working correctly on Windows, but we don't have
+ * to worry about that here.
+ *
+ * If LANGUAGE is given (and l10n is enabled), we want the list of
+ * languages from this environment variable.  Otherwise, we want a
+ * single language code.  If l10n is disabled, NULL will do.
+ *
+ * Language codes should have the form xx[_YY][ zzz] 
+ */
+static gchar **
+get_locale_list (void)
+{
+  const gchar *lc_messages;
+  const gchar *language;
+
+  /* We really should be checking LC_MESSAGES, but it's not in POSIX.
+   * If we don't have it, use LC_CTYPE instead and manually check the
+   * variables that may have impacted the setting of LC_MESSAGES if we
+   * had it.
+   *
+   * Of course, none of this is particularly threadsafe...
+   */
+#ifdef LC_MESSAGES
+  lc_messages = setlocale (LC_MESSAGES, NULL);
+#else
+  lc_messages = setlocale (LC_CTYPE, NULL);
+
+  /* If LC_CTYPE is the "C" locale then this is a good indication that
+   * l10n has not been enabled for this program.
+   *
+   * Otherwise, do some more checking to determine what should be the
+   * correct LC_MESSAGES value.
+   */
+  if (lc_messages != NULL && !g_str_equal (lc_messages, "C") && !g_str_equal (lc_messages, "POSIX"))
+    {
+      /* We want to consult the value of LC_MESSAGES to get a more
+       * specific value than we saw from LC_CTYPE, but we only want to
+       * do this in the case that LC_ALL was not set.
+       *
+       * We do not consult LANG because this would already have impacted
+       * the setting of LC_CTYPE as well.
+       */
+      if (!g_getenv ("LC_ALL"))
+        {
+          const gchar *var;
+
+          var = g_getenv ("LC_MESSAGES");
+
+          if (var)
+            lc_messages = var;
+        }
+    }
+#endif
+
+  /* Hopefully we now have a reasonable value for 'lc_messages'.
+   *
+   * If it's a C locale, just return NULL now, ignoring LANGUAGE.
+   */
+  if (lc_messages == NULL || g_str_equal (lc_messages, "C") || g_str_equal (lc_messages, "POSIX"))
+    return NULL;
+
+  /* Otherwise, check LANGUAGE. */
+  language = g_getenv ("LANGUAGE");
+
+  /* If LANGUAGE was set, split it out and return it. */
+  if (language)
+    return g_strsplit (language, ":", 0);
+
+  /* Otherwise, return the LC_MESSAGES value as a one-item array. */
+  {
+    gchar **locales;
+
+    locales = g_new (gchar *, 1 + 1);
+    locales[0] = g_strdup (lc_messages);
+    locales[1] = NULL;
+
+    return locales;
+  }
+}
+
+/* This function returns an array of arrays of strings such that each
+ * item in the outermost array corresponds to one language that the user
+ * has selected (with the first item being the most-preferred language).
+ *
+ * Each inner array gives a priority-ordered list of locales that would
+ * be a suitable match for that language.
+ *
+ * The idea is that for each item in the outer array, we should have a
+ * corresponding locale in the desktop file index that we perform
+ * searches on, even if it is the C locale (since missing translations
+ * would result in the C locale being used anyway).
+ *
+ * The very first matching locale is the one we use for querying
+ * strings, however.
+ */
+static const gchar * const * const *
+get_exploded_locale_list (void)
+{
+  static gchar ***list;
+
+  if (g_once_init_enter (&list))
+    {
+      gchar ***result;
+      gchar **locales;
+      gint i, n;
+
+      locales = get_locale_list ();
+      if (locales == NULL || locales[0] == NULL)
+        {
+          g_free (locales);
+          locales = g_new (gchar *, 2);
+          locales[0] = g_strdup ("C");
+          locales[1] = NULL;
+        }
+
+      n = g_strv_length (locales);
+
+      result = g_new (gchar **, n + 1);
+      for (i = 0; i < n; i++)
+        result[i] = g_get_locale_variants (locales[i]);
+      result[i] = NULL;
+
+      g_once_init_leave (&list, result);
+      g_free (locales);
+    }
+
+  return (const gchar * const * const *) list;
+}
+
 /* DesktopFileDirIndex implementation {{{1 */
 
 /* DesktopFileDir implementation {{{1 */
@@ -160,7 +296,7 @@ typedef struct
   struct dfi_index           *dfi;
   gint8                      *key_id_map;
   guint16                     key_id_map_length;
-  guint16                     this_locale_id;
+  guint16                    *locale_ids;
   guint16                     desktop_entry_id;
 } DesktopFileDir;
 
@@ -431,35 +567,76 @@ desktop_file_dir_indexed_init (DesktopFileDir *dir)
 {
   desktop_key_init ();
 
-  /* Find the current locale */
+  /* Find the locale IDs to use */
   {
+    const gchar * const * const *language_names;
     const struct dfi_string_list *locale_names;
-    const gchar * const *language_names;
-    gint i;
-
-    locale_names = dfi_index_get_locale_names (dir->dfi);
-
-    language_names = g_get_language_names ();
+    gint i, j, k, n;
+
+    /* From get_exploded_locale_list() we will get an array that looks
+     * something like so:
+     *
+     *  [ ['eo', 'C'], ['de_CH', 'de', 'C'], ['fr_CA', 'fr', 'C'] ]
+     *
+     * We want to produce a list of IDs such each item in the list
+     * corresponds to the 'best match' for each item in the original
+     * list.
+     *
+     * For example, if the index has entries for 'eo', 'fr_CA' and 'de',
+     * but not for 'de_CH' then our created array would have the ID
+     * numbers corresponding to the following locales:
+     *
+     *  [ eo, de, fr_CA ]
+     *
+     * In the event that the best match for a particular item in the
+     * list was the C locale, we simply exclude that item from being
+     * added.  So for example, if 'eo' was not in the index, we would
+     * not add the 'C' locale, but would rather just have this list:
+     *
+     *  [ de, fr_CA ]
+     *
+     * The C locale is always ID 0, so we use that as a terminator.
+     *
+     * As a minor nag, this will cause problems if "C" was explicitly
+     * listed among the items in LANGUAGE= but this would be a pretty
+     * silly thing to do, since lookups in C never fail and it's already
+     * the universal fallback...
+     */
     locale_names = dfi_index_get_locale_names (dir->dfi);
-
-    /* If we don't get anything, the C locale is always zero, so set
-     * it as a default.
+    language_names = get_exploded_locale_list ();
+    for (n = 0; language_names[n]; n++)
+      ;
+
+    /* Allocate an array under the assumption that we will fill it.  If
+     * we don't, it's no big loss.
+     *
+     * Note: the search code assumes that locale_ids will always have at
+     * least two items, and that even if locale_ids[0] is zero,
+     * locale_ids[1] will also be zero.
+     *
+     * Do not modify this allocation in a way that would invalidate that
+     * assumption.
      */
-    dir->this_locale_id = 0;
+    g_assert (n != 0);
+    dir->locale_ids = g_new0 (guint16, n + 1);
+    i = 0;  /* We use 'i' as a moving index into locale_ids, as we add items */
 
-    /* Iterate over our language names, in order of preference */
-    for (i = 0; language_names[i]; i++)
-      {
-        gint result = dfi_string_list_binary_search (locale_names, dir->dfi, language_names[i]);
+    /* Iterate over our locales, looking up the ID for each */
+    for (j = 0; language_names[j]; j++)
+      for (k = 0; language_names[j][k] && !g_str_equal (language_names[j][k], "C"); k++)
+        {
+          gint result = dfi_string_list_binary_search (locale_names, dir->dfi, language_names[j][k]);
 
-        if (result >= 0)
-          {
-            dir->this_locale_id = result;
-            break;
-          }
-      }
+          g_print ("Looking for %s\n", language_names[j][k]);
 
-    g_printerr ("found my locale is %d/%s\n", dir->this_locale_id, language_names[i]);
+          if (result >= 0)
+            {
+              g_print ("Found language %s is ID %d\n", language_names[j][k], result);
+              dir->locale_ids[i++] = result;
+              break;
+            }
+        }
+    g_assert (i <= n);
   }
 
   /* Populate the app names list. */
@@ -674,61 +851,80 @@ desktop_file_dir_indexed_search (DesktopFileDir *dir,
   const struct dfi_text_index_item *start;
   const struct dfi_text_index_item *end;
   const struct dfi_text_index_item *item;
+  gint i;
 
-  text_indexes = dfi_index_get_text_indexes (dir->dfi);
-  text_index = dfi_text_index_from_pointer (dir->dfi,
-                                            dfi_pointer_array_get_pointer (text_indexes, 
dir->this_locale_id));
-  if (text_index == NULL)
-    return;
+  /* We search every locale...
+   *
+   * If the user lists off multiple languages in LANGUAGE then it's
+   * probably because they understand multiple languages.
+   *
+   * If a particular app is translated or not for a given locale is
+   * semi-random, so don't let the existence of a translation in a
+   * higher-priority language impact the ability to find an app by
+   * searching in the lower-priority language.
+   *
+   * Note: if the only available locale is C, then we want to allow
+   * searching there...  This will be the case if locale_list[0] is 0,
+   * so we always permit i = 0 to loop.
+   *
+   * Note: locale_ids always has at least two items in it, and it is
+   * allocated zero-filled, so locale_ids[1] is safe, even if
+   * locale_ids[0] was 0.
+   */
+  for (i = 0; i == 0 || dir->locale_ids[i]; i++)
+    {
+      text_indexes = dfi_index_get_text_indexes (dir->dfi);
+      text_index = dfi_text_index_from_pointer (dir->dfi,
+                                                dfi_pointer_array_get_pointer (text_indexes, 
dir->locale_ids[i]));
+      if (text_index == NULL)
+        return;
 
-  dfi_text_index_prefix_search (text_index, dir->dfi, term, &start, &end);
+      dfi_text_index_prefix_search (text_index, dir->dfi, term, &start, &end);
 
-  app_names = dfi_index_get_app_names (dir->dfi);
+      app_names = dfi_index_get_app_names (dir->dfi);
 
-  for (item = start; item < end; item++)
-    {
-      const dfi_id *ids;
-      guint n_ids;
+      for (item = start; item < end; item++)
+        {
+          const dfi_id *ids;
+          guint n_ids;
 
-      gint j;
+          gint j;
 
-      ids = dfi_text_index_item_get_ids (item, dir->dfi, &n_ids);
-      //n_ids = 0;
-      //ids = dfi_text_index_get_ids_for_exact_match (dir->dfi, text_index, term, (gint *) &n_ids);
-      g_printerr ("n_ids is %u\n", n_ids);
+          ids = dfi_text_index_item_get_ids (item, dir->dfi, &n_ids);
 
-      if (n_ids % 3 != 0)
-        continue;
+          if (n_ids % 3 != 0)
+            continue;
 
-      for (j = 0; j < n_ids; j += 3)
-        {
-          guint16 app_id, group_id, key_id;
-          const gchar *app_name;
-          guint8 match_category;
+          for (j = 0; j < n_ids; j += 3)
+            {
+              guint16 app_id, group_id, key_id;
+              const gchar *app_name;
+              guint8 match_category;
 
-          app_id   = dfi_id_get (ids[j + 0]);
-          group_id = dfi_id_get (ids[j + 1]);
-          key_id   = dfi_id_get (ids[j + 2]);
+              app_id   = dfi_id_get (ids[j + 0]);
+              group_id = dfi_id_get (ids[j + 1]);
+              key_id   = dfi_id_get (ids[j + 2]);
 
-          app_name = dfi_string_list_get_string_at_index (app_names, dir->dfi, app_id);
-          if (!app_name)
-            continue;
+              app_name = dfi_string_list_get_string_at_index (app_names, dir->dfi, app_id);
+              if (!app_name)
+                continue;
 
-          if (desktop_file_dir_app_name_is_masked (dir, app_name))
-            continue;
+              if (desktop_file_dir_app_name_is_masked (dir, app_name))
+                continue;
 
-          if (group_id != dir->desktop_entry_id)
-            continue;
+              if (group_id != dir->desktop_entry_id)
+                continue;
 
-          if (key_id >= dir->key_id_map_length)
-            continue;
+              if (key_id >= dir->key_id_map_length)
+                continue;
 
-          match_category = desktop_key_match_category[dir->key_id_map[key_id]];
+              match_category = desktop_key_match_category[dir->key_id_map[key_id]];
 
-          if (!match_category)
-            continue;
+              if (!match_category)
+                continue;
 
-          insert_into_list (categories, app_name, match_category - 1, max_hits);
+              insert_into_list (categories, app_name, match_category - 1, max_hits);
+            }
         }
     }
 }
@@ -788,9 +984,14 @@ desktop_file_dir_reset (DesktopFileDir *dir)
       dir->key_id_map = NULL;
     }
 
+  if (dir->locale_ids)
+    {
+      g_free (dir->locale_ids);
+      dir->locale_ids = NULL;
+    }
+
   dir->key_id_map_length = 0;
   dir->desktop_entry_id = 0;
-  dir->this_locale_id = 0;
 
   dir->is_setup = FALSE;
 }



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]