[yelp] Support for keywords in search results for Mallard and DocBook



commit c9b1ca3ae200b424f0bc67ceb1a06fb598a839c3
Author: Shaun McCance <shaunm gnome org>
Date:   Mon Dec 3 09:28:56 2018 -0500

    Support for keywords in search results for Mallard and DocBook

 libyelp/yelp-docbook-document.c | 82 +++++++++++++++++++++++++++++++++++++++--
 libyelp/yelp-document.c         | 38 +++++++++++++++++++
 libyelp/yelp-document.h         |  6 +++
 libyelp/yelp-mallard-document.c | 35 ++++++++++++++++--
 libyelp/yelp-search-entry.c     | 25 +++++++++----
 5 files changed, 172 insertions(+), 14 deletions(-)
---
diff --git a/libyelp/yelp-docbook-document.c b/libyelp/yelp-docbook-document.c
index 6ecaf316..893732c0 100644
--- a/libyelp/yelp-docbook-document.c
+++ b/libyelp/yelp-docbook-document.c
@@ -81,6 +81,8 @@ static gboolean       docbook_walk_divisionQ    (YelpDocbookDocument  *docbook,
                                                  xmlNodePtr            cur);
 static gchar *        docbook_walk_get_title    (YelpDocbookDocument  *docbook,
                                                  xmlNodePtr            cur);
+static gchar *        docbook_walk_get_keywords (YelpDocbookDocument  *docbook,
+                                                 xmlNodePtr            cur);
 
 static void           transform_chunk_ready     (YelpTransform        *transform,
                                                  gchar                *chunk_id,
@@ -123,6 +125,8 @@ struct _YelpDocbookDocumentPrivate {
 
     GFileMonitor **monitors;
     gint64         reload_time;
+
+    GHashTable   *autoids;
 };
 
 /******************************************************************************/
@@ -161,6 +165,7 @@ yelp_docbook_document_init (YelpDocbookDocument *docbook)
     YelpDocbookDocumentPrivate *priv = GET_PRIV (docbook);
 
     priv->state = DOCBOOK_STATE_BLANK;
+    priv->autoids = NULL;
 
     g_mutex_init (&priv->mutex);
 }
@@ -194,6 +199,8 @@ yelp_docbook_document_finalize (GObject *object)
     g_free (priv->cur_prev_id);
     g_free (priv->root_id);
 
+    g_hash_table_destroy (priv->autoids);
+
     g_mutex_clear (&priv->mutex);
 
     G_OBJECT_CLASS (yelp_docbook_document_parent_class)->finalize (object);
@@ -532,6 +539,7 @@ docbook_walk (YelpDocbookDocument *docbook)
     gchar        autoidstr[20];
     xmlChar     *id = NULL;
     xmlChar     *title = NULL;
+    xmlChar     *keywords = NULL;
     xmlNodePtr   cur, old_cur;
     gboolean chunkQ;
     YelpDocbookDocumentPrivate *priv = GET_PRIV (docbook);
@@ -558,7 +566,7 @@ docbook_walk (YelpDocbookDocument *docbook)
     if (docbook_walk_divisionQ (docbook, priv->xmlcur) && !id) {
         /* If id attribute is not present, autogenerate a
          * unique value, and insert it into the in-memory tree */
-        g_snprintf (autoidstr, 20, "//autoid-%d", ++autoid);
+        g_snprintf (autoidstr, 20, "//yelp-autoid-%d", ++autoid);
         if (priv->xmlcur->ns) {
             xmlNewNsProp (priv->xmlcur,
                           xmlNewNs (priv->xmlcur, XML_XML_NAMESPACE, BAD_CAST "xml"),
@@ -569,15 +577,20 @@ docbook_walk (YelpDocbookDocument *docbook)
             xmlNewProp (priv->xmlcur, BAD_CAST "id", BAD_CAST autoidstr);
             id = xmlGetProp (priv->xmlcur, BAD_CAST "id");
         }
+        if (!priv->autoids)
+            priv->autoids = g_hash_table_new_full (g_str_hash, g_str_equal, xmlFree, xmlFree);
+        g_hash_table_insert (priv->autoids, xmlGetNodePath(priv->xmlcur), xmlStrdup (id));
     }
 
     if (docbook_walk_chunkQ (docbook, priv->xmlcur, priv->cur_depth, priv->max_depth)) {
         title = BAD_CAST docbook_walk_get_title (docbook, priv->xmlcur);
+        keywords = BAD_CAST docbook_walk_get_keywords (docbook, priv->xmlcur);
 
         debug_print (DB_DEBUG, "  id: \"%s\"\n", id);
         debug_print (DB_DEBUG, "  title: \"%s\"\n", title);
 
         yelp_document_set_page_title (document, (gchar *) id, (gchar *) title);
+        yelp_document_set_page_keywords (document, (gchar *) id, (gchar *) keywords);
 
         if (priv->cur_prev_id) {
             yelp_document_set_prev_id (document, (gchar *) id, priv->cur_prev_id);
@@ -626,6 +639,8 @@ docbook_walk (YelpDocbookDocument *docbook)
         xmlFree (id);
     if (title != NULL)
         xmlFree (title);
+    if (keywords != NULL)
+        xmlFree (keywords);
 }
 
 static gboolean
@@ -791,6 +806,42 @@ docbook_walk_get_title (YelpDocbookDocument *docbook,
         return g_strdup (_("Unknown"));
 }
 
+static gchar *
+docbook_walk_get_keywords (YelpDocbookDocument *docbook,
+                           xmlNodePtr           cur)
+{
+    xmlNodePtr info, keywordset, keyword;
+    GString *ret = NULL;
+
+    for (info = cur->children; info; info = info->next) {
+        if (g_str_has_suffix ((const gchar *) info->name, "info")) {
+            for (keywordset = info->children; keywordset; keywordset = keywordset->next) {
+                if (!xmlStrcmp (keywordset->name, BAD_CAST "keywordset")) {
+                    for (keyword = keywordset->children; keyword; keyword = keyword->next) {
+                        if (!xmlStrcmp (keyword->name, BAD_CAST "keyword")) {
+                            xmlChar *content;
+                            if (ret)
+                                g_string_append(ret, ", ");
+                            else
+                                ret = g_string_new ("");
+                            /* FIXME: try this with just ->children->text */
+                            content = xmlNodeGetContent (keyword);
+                            g_string_append (ret, (gchar *) content);
+                            xmlFree (content);
+                        }
+                    }
+                }
+            }
+            break;
+        }
+    }
+
+    if (ret)
+        return g_string_free (ret, FALSE);
+    else
+        return NULL;
+}
+
 /******************************************************************************/
 
 static void
@@ -910,6 +961,7 @@ typedef struct {
     GString *str;
     gint depth;
     gint max_depth;
+    gboolean in_info;
 } DocbookIndexData;
 
 static void
@@ -926,10 +978,15 @@ docbook_index_node (DocbookIndexData *index)
         g_string_append (index->str, (const gchar *) index->cur->content);
         return;
     }
-    if (index->cur->type != XML_ELEMENT_NODE ||
-        g_str_has_suffix ((const gchar *) index->cur->name, "info") ||
-        g_str_equal (index->cur->name, "remark"))
+    if (index->cur->type != XML_ELEMENT_NODE) {
+        return;
+    }
+    if (g_str_equal (index->cur->name, "remark")) {
+        return;
+    }
+    if (g_str_has_suffix ((const gchar *) index->cur->name, "info")) {
         return;
+    }
     oldcur = index->cur;
     for (child = index->cur->children; child; child = child->next) {
         index->cur = child;
@@ -944,15 +1001,32 @@ docbook_index_chunk (DocbookIndexData *index)
     xmlChar *id;
     xmlNodePtr child;
     gchar *title = NULL;
+    gchar *keywords;
     GSList *chunks = NULL;
+    YelpDocbookDocumentPrivate *priv = GET_PRIV (index->docbook);
 
     id = xmlGetProp (index->cur, BAD_CAST "id");
+    if (!id)
+        id = xmlGetNsProp (index->cur, XML_XML_NAMESPACE, BAD_CAST "id");
+    if (!id) {
+        xmlChar *path = xmlGetNodePath (index->cur);
+        id = g_hash_table_lookup (priv->autoids, path);
+        if (id)
+            id = xmlStrdup (id);
+        xmlFree (path);
+    }
+
     if (id != NULL) {
         title = docbook_walk_get_title (index->docbook, index->cur);
         if (index->cur->parent->parent == NULL)
             yelp_storage_set_root_title (yelp_storage_get_default (),
                                          index->doc_uri, title);
         index->str = g_string_new ("");
+        keywords = docbook_walk_get_keywords (index->docbook, index->cur);
+        if (keywords) {
+            g_string_append (index->str, keywords);
+            g_free (keywords);
+        }
     }
 
     for (child = index->cur->children; child; child = child->next) {
diff --git a/libyelp/yelp-document.c b/libyelp/yelp-document.c
index c1eec9cc..83582eaf 100644
--- a/libyelp/yelp-document.c
+++ b/libyelp/yelp-document.c
@@ -84,6 +84,7 @@ struct _YelpDocumentPriv {
     Hash   *page_ids;      /* Mapping of fragment IDs to real page IDs */
     Hash   *titles;        /* Mapping of page IDs to titles */
     Hash   *descs;         /* Mapping of page IDs to descs */
+    Hash   *keywords;      /* Mapping of page IDs to keywords */
     Hash   *icons;         /* Mapping of page IDs to icons */
     Hash   *mime_types;    /* Mapping of page IDs to mime types */
     Hash   *contents;      /* Mapping of page IDs to string content */
@@ -313,6 +314,7 @@ yelp_document_init (YelpDocument *document)
     priv->page_ids = hash_new (g_free );
     priv->titles = hash_new (g_free);
     priv->descs = hash_new (g_free);
+    priv->keywords = hash_new (g_free);
     priv->icons = hash_new (g_free);
     priv->mime_types = hash_new (g_free);
     priv->contents = hash_new ((GDestroyNotify) str_unref);
@@ -361,6 +363,7 @@ yelp_document_finalize (GObject *object)
     hash_free (document->priv->page_ids);
     hash_free (document->priv->titles);
     hash_free (document->priv->descs);
+    hash_free (document->priv->keywords);
     hash_free (document->priv->icons);
     hash_free (document->priv->mime_types);
 
@@ -750,6 +753,41 @@ yelp_document_set_page_desc (YelpDocument *document,
     g_mutex_unlock (&document->priv->mutex);
 }
 
+gchar *
+yelp_document_get_page_keywords (YelpDocument *document,
+                                 const gchar  *page_id)
+{
+    gchar *real, *ret = NULL;
+
+    g_assert (document != NULL && YELP_IS_DOCUMENT (document));
+
+    if (page_id != NULL && g_str_has_prefix (page_id, "search="))
+        return NULL;
+
+    g_mutex_lock (&document->priv->mutex);
+    real = hash_lookup (document->priv->page_ids, page_id);
+    if (real) {
+       ret = hash_lookup (document->priv->keywords, real);
+       if (ret)
+           ret = g_strdup (ret);
+    }
+    g_mutex_unlock (&document->priv->mutex);
+
+    return ret;
+}
+
+void
+yelp_document_set_page_keywords (YelpDocument *document,
+                                 const gchar  *page_id,
+                                 const gchar  *keywords)
+{
+    g_assert (document != NULL && YELP_IS_DOCUMENT (document));
+
+    g_mutex_lock (&document->priv->mutex);
+    hash_replace (document->priv->keywords, page_id, g_strdup (keywords));
+    g_mutex_unlock (&document->priv->mutex);
+}
+
 gchar *
 yelp_document_get_page_icon (YelpDocument *document,
                              const gchar  *page_id)
diff --git a/libyelp/yelp-document.h b/libyelp/yelp-document.h
index 175b281a..8ee9203e 100644
--- a/libyelp/yelp-document.h
+++ b/libyelp/yelp-document.h
@@ -151,6 +151,12 @@ void              yelp_document_set_page_desc       (YelpDocument         *docum
                                                      const gchar          *page_id,
                                                      const gchar          *desc);
 
+gchar *           yelp_document_get_page_keywords   (YelpDocument         *document,
+                                                     const gchar          *page_id);
+void              yelp_document_set_page_keywords   (YelpDocument         *document,
+                                                     const gchar          *page_id,
+                                                     const gchar          *keywords);
+
 gchar *           yelp_document_get_page_icon       (YelpDocument         *document,
                                                      const gchar          *page_id);
 void              yelp_document_set_page_icon       (YelpDocument         *document,
diff --git a/libyelp/yelp-mallard-document.c b/libyelp/yelp-mallard-document.c
index 74fc80ee..b72dc9a5 100644
--- a/libyelp/yelp-mallard-document.c
+++ b/libyelp/yelp-mallard-document.c
@@ -69,6 +69,7 @@ typedef struct {
 
     gchar         *page_title;
     gchar         *page_desc;
+    gchar         *page_keywords;
     gchar         *next_page;
 } MallardPageData;
 
@@ -393,6 +394,10 @@ mallard_think (YelpMallardDocument *mallard)
                 yelp_document_set_page_desc ((YelpDocument *) mallard,
                                              page_data->page_id,
                                              page_data->page_desc);
+                yelp_document_set_page_keywords ((YelpDocument *) mallard,
+                                                 page_data->page_id,
+                                                 page_data->page_keywords);
+
                 if (page_data->next_page != NULL) {
                     yelp_document_set_next_id ((YelpDocument *) mallard,
                                                page_data->page_id,
@@ -662,11 +667,25 @@ mallard_page_data_info (MallardPageData *page_data,
             xmlXPathObjectPtr obj;
             page_data->xpath->node = child;
             obj = xmlXPathCompiledEval (priv->normalize, page_data->xpath);
+            g_free(page_data->page_desc);
             page_data->page_desc = g_strdup ((const gchar *) obj->stringval);
             xmlXPathFreeObject (obj);
 
             xmlAddChild (cache_node, xmlCopyNode (child, 1));
         }
+        else if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "keywords")) {
+            /* FIXME: multiple keywords? same for desc/title */
+
+            YelpMallardDocumentPrivate *priv = GET_PRIV (page_data->mallard);
+            xmlXPathObjectPtr obj;
+            page_data->xpath->node = child;
+            obj = xmlXPathCompiledEval (priv->normalize, page_data->xpath);
+            g_free(page_data->page_keywords);
+            page_data->page_keywords = g_strdup ((const gchar *) obj->stringval);
+            xmlXPathFreeObject (obj);
+
+            xmlAddChild (cache_node, xmlCopyNode (child, 1));
+        }
         else if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "link")) {
             xmlChar *type, *next;
 
@@ -749,6 +768,7 @@ mallard_page_data_free (MallardPageData *page_data)
         xmlXPathFreeContext (page_data->xpath);
     g_free (page_data->page_title);
     g_free (page_data->page_desc);
+    g_free (page_data->page_keywords);
     g_free (page_data->next_page);
     g_free (page_data);
 }
@@ -896,16 +916,18 @@ typedef struct {
     xmlNodePtr cur;
     GString *str;
     gboolean is_inline;
+    gboolean in_info;
 } MallardIndexData;
 
 static void
 mallard_index_node (MallardIndexData *index)
 {
     xmlNodePtr orig, child;
-    gboolean was_inline;
+    gboolean was_inline, was_info;
 
     orig = index->cur;
     was_inline = index->is_inline;
+    was_info = index->in_info;
 
     for (child = index->cur->children; child; child = child->next) {
         if (index->is_inline) {
@@ -921,18 +943,24 @@ mallard_index_node (MallardIndexData *index)
         }
 
         if (child->type != XML_ELEMENT_NODE ||
-            xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "info") ||
             xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "comment"))
             continue;
 
-        if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "p") ||
+        if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "info")) {
+            index->in_info = TRUE;
+        }
+        else if (xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "p") ||
             xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "code") ||
             xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "screen") ||
             xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "title") ||
             xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "desc") ||
+            xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "keywords") ||
             xml_node_is_ns_name (child, MALLARD_NS, BAD_CAST "cite")) {
             index->is_inline = TRUE;
         }
+        else if (index->in_info && !index->is_inline) {
+            continue;
+        }
 
         index->cur = child;
         mallard_index_node (index);
@@ -943,6 +971,7 @@ mallard_index_node (MallardIndexData *index)
 
         index->cur = orig;
         index->is_inline = was_inline;
+        index->in_info = was_info;
     }
 }
 
diff --git a/libyelp/yelp-search-entry.c b/libyelp/yelp-search-entry.c
index 55be49cc..2b5a8196 100644
--- a/libyelp/yelp-search-entry.c
+++ b/libyelp/yelp-search-entry.c
@@ -100,7 +100,8 @@ enum {
     COMPLETION_COL_DESC,
     COMPLETION_COL_ICON,
     COMPLETION_COL_PAGE,
-    COMPLETION_COL_FLAGS
+    COMPLETION_COL_FLAGS,
+    COMPLETION_COL_KEYWORDS
 };
 
 enum {
@@ -451,7 +452,7 @@ entry_match_func (GtkEntryCompletion *completion,
                   YelpSearchEntry  *entry)
 {
     gint stri;
-    gchar *title, *desc, *titlecase = NULL, *desccase = NULL;
+    gchar *title, *desc, *keywords, *titlecase = NULL, *desccase = NULL, *keywordscase = NULL;
     gboolean ret = FALSE;
     gchar **strs;
     gint flags;
@@ -470,6 +471,7 @@ entry_match_func (GtkEntryCompletion *completion,
     gtk_tree_model_get (model, iter,
                         COMPLETION_COL_TITLE, &title,
                         COMPLETION_COL_DESC, &desc,
+                        COMPLETION_COL_KEYWORDS, &keywords,
                         -1);
     if (title) {
         titlecase = g_utf8_casefold (title, -1);
@@ -479,14 +481,20 @@ entry_match_func (GtkEntryCompletion *completion,
         desccase = g_utf8_casefold (desc, -1);
         g_free (desc);
     }
+    if (keywords) {
+        keywordscase = g_utf8_casefold (keywords, -1);
+        g_free (keywords);
+    }
 
     strs = g_regex_split (nonword, key, 0);
     ret = TRUE;
     for (stri = 0; strs[stri]; stri++) {
         if (!titlecase || !strstr (titlecase, strs[stri])) {
             if (!desccase || !strstr (desccase, strs[stri])) {
-                ret = FALSE;
-                break;
+                if (!keywordscase || !strstr (keywordscase, strs[stri])) {
+                    ret = FALSE;
+                    break;
+                }
             }
         }
     }
@@ -592,12 +600,13 @@ view_loaded (YelpView          *view,
         !g_str_equal (doc_uri, priv->completion_uri)) {
         completion = (GtkTreeModel *) g_hash_table_lookup (completions, doc_uri);
         if (completion == NULL) {
-            GtkListStore *base = gtk_list_store_new (5,
+            GtkListStore *base = gtk_list_store_new (6,
                                                      G_TYPE_STRING,  /* title */
                                                      G_TYPE_STRING,  /* desc */
                                                      G_TYPE_STRING,  /* icon */
                                                      G_TYPE_STRING,  /* uri */
-                                                     G_TYPE_INT      /* flags */
+                                                     G_TYPE_INT,     /* flags */
+                                                     G_TYPE_STRING   /* keywords */
                                                      );
             completion = gtk_tree_model_sort_new_with_model (GTK_TREE_MODEL (base));
             gtk_tree_sortable_set_default_sort_func (GTK_TREE_SORTABLE (completion),
@@ -607,15 +616,17 @@ view_loaded (YelpView          *view,
             if (document != NULL) {
                 ids = yelp_document_list_page_ids (document);
                 for (i = 0; ids[i]; i++) {
-                    gchar *title, *desc, *icon;
+                    gchar *title, *desc, *icon, *keywords;
                     gtk_list_store_insert (GTK_LIST_STORE (base), &iter, 0);
                     title = yelp_document_get_page_title (document, ids[i]);
                     desc = yelp_document_get_page_desc (document, ids[i]);
                     icon = yelp_document_get_page_icon (document, ids[i]);
+                    keywords = yelp_document_get_page_keywords (document, ids[i]);
                     gtk_list_store_set (base, &iter,
                                         COMPLETION_COL_TITLE, title,
                                         COMPLETION_COL_DESC, desc,
                                         COMPLETION_COL_ICON, icon,
+                                        COMPLETION_COL_KEYWORDS, keywords,
                                         COMPLETION_COL_PAGE, ids[i],
                                         -1);
                     g_free (icon);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]