[glib] Add g_desktop_app_info_search()



commit 3d32d5359aefc0c287265c85278a31c7e2ea9b3d
Author: Ryan Lortie <desrt desrt ca>
Date:   Tue Nov 5 22:51:48 2013 -0500

    Add g_desktop_app_info_search()
    
    The first time this function is called we load all of the keyfiles in
    the directory, ignoring the 'Hidden' ones and build an index out of the
    interesting fields using g_str_tokenize_and_fold().
    
    We do prefix matching on the tokens to find relevent desktop files.
    
    Right now this is implemented as a hashtable that we iterate over,
    checking prefixes on each token.  This could possibly be sped up by
    creating an array, but it's already pretty fast...
    
    https://bugzilla.gnome.org/show_bug.cgi?id=711557

 gio/gdesktopappinfo.c |  525 +++++++++++++++++++++++++++++++++++++++++++++++++
 gio/gdesktopappinfo.h |    2 +
 2 files changed, 527 insertions(+), 0 deletions(-)
---
diff --git a/gio/gdesktopappinfo.c b/gio/gdesktopappinfo.c
index a76369c..bf8ce89 100644
--- a/gio/gdesktopappinfo.c
+++ b/gio/gdesktopappinfo.c
@@ -150,6 +150,7 @@ typedef struct
   GLocalDirectoryMonitor     *monitor;
   GHashTable                 *app_names;
   gboolean                    is_setup;
+  GHashTable                 *memory_index;
 } DesktopFileDir;
 
 static DesktopFileDir *desktop_file_dirs;
@@ -245,6 +246,276 @@ add_to_table_if_appropriate (GHashTable      *apps,
   g_hash_table_insert (apps, g_strdup (info->desktop_id), info);
 }
 
+enum
+{
+  DESKTOP_KEY_Comment,
+  DESKTOP_KEY_GenericName,
+  DESKTOP_KEY_Keywords,
+  DESKTOP_KEY_Name,
+  DESKTOP_KEY_X_GNOME_FullName,
+
+  N_DESKTOP_KEYS
+};
+
+const gchar desktop_key_match_category[N_DESKTOP_KEYS] = {
+  /* Note: lower numbers are a better match.
+   *
+   * In case we want two keys to match at the same level, we can just
+   * use the same number for the two different keys.
+   */
+  [DESKTOP_KEY_Name]             = 1,
+  [DESKTOP_KEY_GenericName]      = 2,
+  [DESKTOP_KEY_Keywords]         = 3,
+  [DESKTOP_KEY_X_GNOME_FullName] = 4,
+  [DESKTOP_KEY_Comment]          = 5
+};
+
+static gchar *
+desktop_key_get_name (guint key_id)
+{
+  switch (key_id)
+    {
+    case DESKTOP_KEY_Comment:
+      return "Comment";
+    case DESKTOP_KEY_GenericName:
+      return "GenericName";
+    case DESKTOP_KEY_Keywords:
+      return "Keywords";
+    case DESKTOP_KEY_Name:
+      return "Name";
+    case DESKTOP_KEY_X_GNOME_FullName:
+      return "X-GNOME-FullName";
+    default:
+      g_assert_not_reached ();
+    }
+}
+
+/* Search global state {{{2
+ *
+ * We only ever search under a global lock, so we can use (and reuse)
+ * some global data to reduce allocations made while searching.
+ *
+ * In short, we keep around arrays of results that we expand as needed
+ * (and never shrink).
+ *
+ * static_token_results: this is where we append the results for each
+ *     token within a given desktop directory, as we handle it (which is
+ *     a union of all matches for this term)
+ *
+ * static_search_results: this is where we build the complete results
+ *     for a single directory (which is an intersection of the matches
+ *     found for each term)
+ *
+ * static_total_results: this is where we build the complete results
+ *     across all directories (which is a union of the matches found in
+ *     each directory)
+ *
+ * The app_names that enter these tables are always pointer-unique (in
+ * the sense that string equality is the same as pointer equality).
+ * This can be guaranteed for two reasons:
+ *
+ *   - we mask appids so that a given appid will only ever appear within
+ *     the highest-precedence directory that contains it.  We never
+ *     return search results from a lower-level directory if a desktop
+ *     file exists in a higher-level one.
+ *
+ *   - within a given directory, the string is unique because it's the
+ *     key in the hashtable of all app_ids for that directory.
+ *
+ * We perform a merging of the results in merge_token_results().  This
+ * works by ordering the two lists and moving through each of them (at
+ * the same time) looking for common elements, rejecting uncommon ones.
+ * "Order" here need not mean any particular thing, as long as it is
+ * some order.  Because of the uniqueness of our strings, we can use
+ * pointer order.  That's what's going on in compare_results() below.
+ */
+struct search_result
+{
+  const gchar *app_name;
+  gint         category;
+};
+
+static struct search_result *static_token_results;
+static gint                  static_token_results_size;
+static gint                  static_token_results_allocated;
+static struct search_result *static_search_results;
+static gint                  static_search_results_size;
+static gint                  static_search_results_allocated;
+static struct search_result *static_total_results;
+static gint                  static_total_results_size;
+static gint                  static_total_results_allocated;
+
+/* And some functions for performing nice operations against it */
+static gint
+compare_results (gconstpointer a,
+                 gconstpointer b)
+{
+  const struct search_result *ra = a;
+  const struct search_result *rb = b;
+
+  if (ra->app_name < rb->app_name)
+    return -1;
+
+  else if (ra->app_name > rb->app_name)
+    return 1;
+
+  else
+    return ra->category - rb->category;
+}
+
+static gint
+compare_categories (gconstpointer a,
+                    gconstpointer b)
+{
+  const struct search_result *ra = a;
+  const struct search_result *rb = b;
+
+  return ra->category - rb->category;
+}
+
+static void
+add_token_result (const gchar *app_name,
+                  guint16      category)
+{
+  if G_UNLIKELY (static_token_results_size == static_token_results_allocated)
+    {
+      static_token_results_allocated = MAX (16, static_token_results_allocated * 2);
+      static_token_results = g_renew (struct search_result, static_token_results, 
static_token_results_allocated);
+    }
+
+  static_token_results[static_token_results_size].app_name = app_name;
+  static_token_results[static_token_results_size].category = category;
+  static_token_results_size++;
+}
+
+static void
+merge_token_results (gboolean first)
+{
+  qsort (static_token_results, static_token_results_size, sizeof (struct search_result), compare_results);
+
+  /* If this is the first token then we are basically merging a list with
+   * itself -- we only perform de-duplication.
+   *
+   * If this is not the first token then we are doing a real merge.
+   */
+  if (first)
+    {
+      const gchar *last_name = NULL;
+      gint i;
+
+      /* We must de-duplicate, but we do so by taking the best category
+       * in each case.
+       *
+       * The final list can be as large as the input here, so make sure
+       * we have enough room (even if it's too much room).
+       */
+
+      if G_UNLIKELY (static_search_results_allocated < static_token_results_size)
+        {
+          static_search_results_allocated = static_token_results_allocated;
+          static_search_results = g_renew (struct search_result,
+                                           static_search_results,
+                                           static_search_results_allocated);
+        }
+
+      for (i = 0; i < static_token_results_size; i++)
+        {
+          /* The list is sorted so that the best match for a given id
+           * will be at the front, so once we have copied an id, skip
+           * the rest of the entries for the same id.
+           */
+          if (static_token_results[i].app_name == last_name)
+            continue;
+
+          last_name = static_token_results[i].app_name;
+
+          static_search_results[static_search_results_size++] = static_token_results[i];
+        }
+    }
+  else
+    {
+      const gchar *last_name = NULL;
+      gint i, j = 0;
+      gint k = 0;
+
+      /* We only ever remove items from the results list, so no need to
+       * resize to ensure that we have enough room.
+       */
+      for (i = 0; i < static_token_results_size; i++)
+        {
+          if (static_token_results[i].app_name == last_name)
+            continue;
+
+          last_name = static_token_results[i].app_name;
+
+          /* Now we only want to have a result in static_search_results
+           * if we already have it there *and* we have it in
+           * static_token_results as well.  The category will be the
+           * lesser of the two.
+           *
+           * Skip past the results in static_search_results that are not
+           * going to be matches.
+           */
+          while (k < static_search_results_size &&
+                 static_search_results[k].app_name < static_token_results[i].app_name)
+            k++;
+
+          if (k < static_search_results_size &&
+              static_search_results[k].app_name == static_token_results[i].app_name)
+            {
+              /* We have a match.
+               *
+               * Category should be the worse of the two (ie:
+               * numerically larger).
+               */
+              static_search_results[j].app_name = static_search_results[k].app_name;
+              static_search_results[j].category = MAX (static_search_results[k].category,
+                                                       static_token_results[i].category);
+              j++;
+            }
+        }
+
+      static_search_results_size = j;
+    }
+
+  /* Clear it out for next time... */
+  static_token_results_size = 0;
+}
+
+static void
+reset_total_search_results (void)
+{
+  static_total_results_size = 0;
+}
+
+static void
+sort_total_search_results (void)
+{
+  qsort (static_total_results, static_total_results_size, sizeof (struct search_result), compare_categories);
+}
+
+static void
+merge_directory_results (void)
+{
+  if G_UNLIKELY (static_total_results_size + static_search_results_size > static_total_results_allocated)
+    {
+      static_total_results_allocated = MAX (16, static_total_results_allocated);
+      while (static_total_results_allocated < static_total_results_size + static_search_results_size)
+        static_total_results_allocated *= 2;
+      static_total_results = g_renew (struct search_result, static_total_results, 
static_total_results_allocated);
+    }
+
+  memcpy (static_total_results + static_total_results_size,
+          static_search_results,
+          static_search_results_size * sizeof (struct search_result));
+
+  static_total_results_size += static_search_results_size;
+
+  /* Clear it out for next time... */
+  static_search_results_size = 0;
+}
+
+
 /* Support for unindexed DesktopFileDirs {{{2 */
 static void
 get_apps_from_dir (GHashTable **apps,
@@ -332,6 +603,157 @@ desktop_file_dir_unindexed_get_all (DesktopFileDir *dir,
     }
 }
 
+typedef struct _MemoryIndexEntry MemoryIndexEntry;
+typedef GHashTable MemoryIndex;
+
+struct _MemoryIndexEntry
+{
+  const gchar      *app_name; /* pointer to the hashtable key */
+  gint              match_category;
+  MemoryIndexEntry *next;
+};
+
+static void
+memory_index_entry_free (gpointer data)
+{
+  MemoryIndexEntry *mie = data;
+
+  while (mie)
+    {
+      MemoryIndexEntry *next = mie->next;
+
+      g_slice_free (MemoryIndexEntry, mie);
+      mie = next;
+    }
+}
+
+static void
+memory_index_add_token (MemoryIndex *mi,
+                        const gchar *token,
+                        gint         match_category,
+                        const gchar *app_name)
+{
+  MemoryIndexEntry *mie, *first;
+
+  mie = g_slice_new (MemoryIndexEntry);
+  mie->app_name = app_name;
+  mie->match_category = match_category;
+
+  first = g_hash_table_lookup (mi, token);
+
+  if (first)
+    {
+      mie->next = first->next;
+      first->next = mie;
+    }
+  else
+    {
+      mie->next = NULL;
+      g_hash_table_insert (mi, g_strdup (token), mie);
+    }
+}
+
+static void
+memory_index_add_string (MemoryIndex *mi,
+                         const gchar *string,
+                         gint         match_category,
+                         const gchar *app_name)
+{
+  gchar **tokens, **alternates;
+  gint i;
+
+  tokens = g_str_tokenize_and_fold (string, NULL, &alternates);
+
+  for (i = 0; tokens[i]; i++)
+    memory_index_add_token (mi, tokens[i], match_category, app_name);
+
+  for (i = 0; alternates[i]; i++)
+    memory_index_add_token (mi, alternates[i], match_category, app_name);
+
+  g_strfreev (alternates);
+  g_strfreev (tokens);
+}
+
+static MemoryIndex *
+memory_index_new (void)
+{
+  return g_hash_table_new_full (g_str_hash, g_str_equal, g_free, memory_index_entry_free);
+}
+
+static void
+desktop_file_dir_unindexed_setup_search (DesktopFileDir *dir)
+{
+  GHashTableIter iter;
+  gpointer app, path;
+
+  dir->memory_index = memory_index_new ();
+
+  /* Nothing to search? */
+  if (dir->app_names == NULL)
+    return;
+
+  g_hash_table_iter_init (&iter, dir->app_names);
+  while (g_hash_table_iter_next (&iter, &app, &path))
+    {
+      GKeyFile *key_file;
+
+      if (desktop_file_dir_app_name_is_masked (dir, app))
+        continue;
+
+      key_file = g_key_file_new ();
+
+      if (g_key_file_load_from_file (key_file, path, G_KEY_FILE_NONE, NULL) &&
+          !g_key_file_get_boolean (key_file, "Desktop Entry", "Hidden", NULL))
+        {
+          /* Index the interesting keys... */
+          gint i;
+
+          for (i = 0; i < G_N_ELEMENTS (desktop_key_match_category); i++)
+            {
+              gchar *value;
+
+              if (!desktop_key_match_category[i])
+                continue;
+
+              value = g_key_file_get_locale_string (key_file, "Desktop Entry", desktop_key_get_name (i), 
NULL, NULL);
+
+              if (value)
+                memory_index_add_string (dir->memory_index, value, desktop_key_match_category[i], app);
+
+              g_free (value);
+            }
+        }
+
+      g_key_file_free (key_file);
+    }
+}
+
+static void
+desktop_file_dir_unindexed_search (DesktopFileDir  *dir,
+                                   const gchar     *search_token)
+{
+  GHashTableIter iter;
+  gpointer key, value;
+
+  if (!dir->memory_index)
+    desktop_file_dir_unindexed_setup_search (dir);
+
+  g_hash_table_iter_init (&iter, dir->memory_index);
+  while (g_hash_table_iter_next (&iter, &key, &value))
+    {
+      MemoryIndexEntry *mie = value;
+
+      if (!g_str_has_prefix (key, search_token))
+        continue;
+
+      while (mie)
+        {
+          add_token_result (mie->app_name, mie->match_category);
+          mie = mie->next;
+        }
+    }
+}
+
 /* DesktopFileDir "API" {{{2 */
 
 /*< internal >
@@ -375,6 +797,12 @@ desktop_file_dir_reset (DesktopFileDir *dir)
       dir->app_names = NULL;
     }
 
+  if (dir->memory_index)
+    {
+      g_hash_table_unref (dir->memory_index);
+      dir->memory_index = NULL;
+    }
+
   dir->is_setup = FALSE;
 }
 
@@ -442,6 +870,20 @@ desktop_file_dir_get_all (DesktopFileDir *dir,
   desktop_file_dir_unindexed_get_all (dir, apps);
 }
 
+/*< internal >
+ * desktop_file_dir_search:
+ * @dir: a #DesktopFilEDir
+ * @term: a normalised and casefolded search term
+ *
+ * Finds the names of applications in @dir that match @term.
+ */
+static void
+desktop_file_dir_search (DesktopFileDir *dir,
+                         const gchar    *search_token)
+{
+  desktop_file_dir_unindexed_search (dir, search_token);
+}
+
 /* Lock/unlock and global setup API {{{2 */
 
 static void
@@ -3055,6 +3497,89 @@ g_app_info_get_default_for_uri_scheme (const char *uri_scheme)
 /* "Get all" API {{{2 */
 
 /**
+ * g_desktop_app_info_search:
+ * @search_string: the search string to use
+ *
+ * Searches desktop files for ones that match @search_string.
+ *
+ * The return value is an array of strvs.  Each strv contains a list of
+ * applications that matched @search_string with an equal score.  The
+ * outer list is sorted by score so that the first strv contains the
+ * best-matching applications, and so on.
+ *
+ * Returns: (array zero-terminated=1) (element-type GStrv) (transfer full): a
+ *   list of strvs.  Free each item with g_strfreev() and free the outer
+ *   list with g_free().
+ */
+gchar ***
+g_desktop_app_info_search (const gchar *search_string)
+{
+  gchar **search_tokens;
+  gint last_category = -1;
+  gchar ***results;
+  gint n_categories = 0;
+  gint start_of_category;
+  gint i, j;
+
+  search_tokens = g_str_tokenize_and_fold (search_string, NULL, NULL);
+
+  desktop_file_dirs_lock ();
+
+  reset_total_search_results ();
+
+  for (i = 0; i < n_desktop_file_dirs; i++)
+    {
+      for (j = 0; search_tokens[j]; j++)
+        {
+          desktop_file_dir_search (&desktop_file_dirs[i], search_tokens[j]);
+          merge_token_results (j == 0);
+        }
+      merge_directory_results ();
+    }
+
+  sort_total_search_results ();
+
+  /* Count the total number of unique categories */
+  for (i = 0; i < static_total_results_size; i++)
+    if (static_total_results[i].category != last_category)
+      {
+        last_category = static_total_results[i].category;
+        n_categories++;
+      }
+
+  results = g_new (gchar **, n_categories + 1);
+
+  /* Start loading into the results list */
+  start_of_category = 0;
+  for (i = 0; i < n_categories; i++)
+    {
+      gint n_items_in_category = 0;
+      gint this_category;
+      gint j;
+
+      this_category = static_total_results[start_of_category].category;
+
+      while (start_of_category + n_items_in_category < static_total_results_size &&
+             static_total_results[start_of_category + n_items_in_category].category == this_category)
+        n_items_in_category++;
+
+      results[i] = g_new (gchar *, n_items_in_category + 1);
+      for (j = 0; j < n_items_in_category; j++)
+        results[i][j] = g_strdup (static_total_results[start_of_category + j].app_name);
+      results[i][j] = NULL;
+
+      start_of_category += n_items_in_category;
+    }
+  results[i] = NULL;
+
+  desktop_file_dirs_unlock ();
+
+  g_strfreev (search_tokens);
+
+  return results;
+}
+
+/**
  * g_app_info_get_all:
  *
  * Gets a list of all of the applications currently registered
diff --git a/gio/gdesktopappinfo.h b/gio/gdesktopappinfo.h
index 5f7f68a..dbf31db 100644
--- a/gio/gdesktopappinfo.h
+++ b/gio/gdesktopappinfo.h
@@ -164,6 +164,8 @@ gboolean    g_desktop_app_info_launch_uris_as_manager (GDesktopAppInfo
                                                       gpointer                    pid_callback_data,
                                                       GError                    **error);
 
+GLIB_AVAILABLE_IN_2_40
+gchar *** g_desktop_app_info_search (const gchar *search_string);
 
 G_END_DECLS
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]