[gi-docgen: 1/5] indexing: Don't generate 'terms' in JSON index




commit 8c2f90ecd24c5e3765c6e694bbf57f57720579b2
Author: FeRD (Frank Dana) <ferdnyc gmail com>
Date:   Mon Mar 21 03:58:48 2022 -0400

    indexing: Don't generate 'terms' in JSON index
    
    Since the 'symbols' list in the JSON index is now being sorted, the
    row-based references in the 'terms' list are invalidated. As such,
    there's no reason to generate them at all.
    
    Do away with all index-based symbol lookups, and all parsing and
    stemming previously used to generate the "terms" list.

 gidocgen/gdgenindices.py | 201 ++++-------------------------------------------
 gidocgen/utils.py        |  76 +-----------------
 2 files changed, 18 insertions(+), 259 deletions(-)
---
diff --git a/gidocgen/gdgenindices.py b/gidocgen/gdgenindices.py
index e759c2d..e61f1e4 100644
--- a/gidocgen/gdgenindices.py
+++ b/gidocgen/gdgenindices.py
@@ -6,7 +6,7 @@ import json
 import os
 import sys
 
-from . import config, core, gir, log, porter, utils
+from . import config, core, gir, log, utils
 
 
 HELP_MSG = "Generates the symbol indices for search"
@@ -14,22 +14,13 @@ HELP_MSG = "Generates the symbol indices for search"
 MISSING_DESCRIPTION = "No description available."
 
 
-def add_index_terms(index, terms, docid):
-    for term in terms:
-        docs = index.setdefault(term, [])
-        if docid not in docs:
-            docs.append(docid)
-
-
-def _gen_aliases(config, stemmer, index, repository, symbols):
+def _gen_aliases(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for alias in symbols:
         if config.is_hidden(alias.name):
             log.debug(f"Skipping hidden type {alias.name}")
             continue
-        idx = len(index_symbols)
         if alias.doc is not None:
             description = alias.doc.content
         else:
@@ -40,20 +31,15 @@ def _gen_aliases(config, stemmer, index, repository, symbols):
             "ctype": alias.base_ctype,
             "summary": utils.preprocess_docs(description, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [alias.base_ctype.lower()], idx)
-        add_index_terms(index_terms, utils.index_identifier(alias.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(description, stemmer), idx)
 
 
-def _gen_bitfields(config, stemmer, index, repository, symbols):
+def _gen_bitfields(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for bitfield in symbols:
         if config.is_hidden(bitfield.name):
             log.debug(f"Skipping hidden type {bitfield.name}")
             continue
-        idx = len(index_symbols)
         if bitfield.doc is not None:
             description = bitfield.doc.content
         else:
@@ -64,17 +50,8 @@ def _gen_bitfields(config, stemmer, index, repository, symbols):
             "ctype": bitfield.base_ctype,
             "summary": utils.preprocess_docs(description, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [bitfield.base_ctype.lower()], idx)
-        add_index_terms(index_terms, utils.index_identifier(bitfield.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(description, stemmer), idx)
-
-        for member in bitfield.members:
-            add_index_terms(index_terms, [member.name], idx)
-            if member.doc is not None:
-                add_index_terms(index_terms, utils.index_description(member.doc.content, stemmer), idx)
 
         for func in bitfield.functions:
-            func_idx = len(index_symbols)
             if func.doc is not None:
                 func_desc = func.doc.content
             else:
@@ -86,20 +63,15 @@ def _gen_bitfields(config, stemmer, index, repository, symbols):
                 "ident": func.identifier,
                 "summary": utils.preprocess_docs(func_desc, repository.namespace, summary=True, plain=True)
             })
-            add_index_terms(index_terms, [func.identifier], func_idx)
-            add_index_terms(index_terms, utils.index_symbol(func.name, stemmer), func_idx)
-            add_index_terms(index_terms, utils.index_description(func_desc, stemmer), func_idx)
 
 
-def _gen_callbacks(config, stemmer, index, repository, symbols):
+def _gen_callbacks(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for callback in symbols:
         if config.is_hidden(callback.name):
             log.debug(f"Skipping hidden callback {callback.name}")
             continue
-        idx = len(index_symbols)
         if callback.doc is not None:
             cb_desc = callback.doc.content
         else:
@@ -110,22 +82,16 @@ def _gen_callbacks(config, stemmer, index, repository, symbols):
             "ctype": callback.base_ctype,
             "summary": utils.preprocess_docs(cb_desc, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [callback.base_ctype.lower()], idx)
-        add_index_terms(index_terms, utils.index_identifier(callback.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(cb_desc, stemmer), idx)
 
 
-def _gen_classes(config, stemmer, index, repository, symbols):
+def _gen_classes(config, index, repository, symbols):
     namespace = repository.namespace
-
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for cls in symbols:
         if config.is_hidden(cls.name):
             log.debug(f"Skipping hidden type {cls.name}")
             continue
-        idx = len(index_symbols)
         if cls.doc is not None:
             cls_desc = cls.doc.content
         else:
@@ -136,12 +102,8 @@ def _gen_classes(config, stemmer, index, repository, symbols):
             "ctype": cls.base_ctype,
             "summary": utils.preprocess_docs(cls_desc, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [cls.base_ctype.lower()], idx)
-        add_index_terms(index_terms, utils.index_identifier(cls.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(cls_desc, stemmer), idx)
 
         for ctor in cls.constructors:
-            ctor_idx = len(index_symbols)
             if ctor.doc is not None:
                 ctor_desc = ctor.doc.content
             else:
@@ -153,12 +115,8 @@ def _gen_classes(config, stemmer, index, repository, symbols):
                 "ident": ctor.identifier,
                 "summary": utils.preprocess_docs(ctor_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, [ctor.identifier], ctor_idx)
-            add_index_terms(index_terms, utils.index_symbol(ctor.name, stemmer), ctor_idx)
-            add_index_terms(index_terms, utils.index_description(ctor_desc, stemmer), ctor_idx)
 
         for method in cls.methods:
-            method_idx = len(index_symbols)
             if method.doc is not None:
                 method_desc = method.doc.content
             else:
@@ -170,12 +128,8 @@ def _gen_classes(config, stemmer, index, repository, symbols):
                 "ident": method.identifier,
                 "summary": utils.preprocess_docs(method_desc, repository.namespace, summary=True, 
plain=True),
             })
-            add_index_terms(index_terms, [method.identifier], method_idx)
-            add_index_terms(index_terms, utils.index_symbol(method.name, stemmer), method_idx)
-            add_index_terms(index_terms, utils.index_description(method_desc, stemmer), method_idx)
 
         for func in cls.functions:
-            func_idx = len(index_symbols)
             if func.doc is not None:
                 func_desc = func.doc.content
             else:
@@ -187,15 +141,11 @@ def _gen_classes(config, stemmer, index, repository, symbols):
                 "ident": func.identifier,
                 "summary": utils.preprocess_docs(func_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, [func.identifier], func_idx)
-            add_index_terms(index_terms, utils.index_symbol(func.name, stemmer), func_idx)
-            add_index_terms(index_terms, utils.index_description(func_desc, stemmer), func_idx)
 
         for prop_name, prop in cls.properties.items():
             if config.is_hidden(cls.name, 'property', prop_name):
                 log.debug(f"Skipping hidden property {cls.name}.{prop_name}")
                 continue
-            prop_idx = len(index_symbols)
             if prop.doc is not None:
                 prop_desc = prop.doc.content
             else:
@@ -206,14 +156,11 @@ def _gen_classes(config, stemmer, index, repository, symbols):
                 "type_name": cls.name,
                 "summary": utils.preprocess_docs(prop_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, utils.index_symbol(prop.name, stemmer), prop_idx)
-            add_index_terms(index_terms, utils.index_description(prop_desc, stemmer), prop_idx)
 
         for signal_name, signal in cls.signals.items():
             if config.is_hidden(cls.name, 'signal', signal_name):
                 log.debug(f"Skipping hidden signal {cls.name}.{signal_name}")
                 continue
-            signal_idx = len(index_symbols)
             if signal.doc is not None:
                 signal_desc = signal.doc.content
             else:
@@ -224,11 +171,8 @@ def _gen_classes(config, stemmer, index, repository, symbols):
                 "type_name": cls.name,
                 "summary": utils.preprocess_docs(signal_desc, repository.namespace, summary=True, 
plain=True),
             })
-            add_index_terms(index_terms, utils.index_symbol(signal.name, stemmer), signal_idx)
-            add_index_terms(index_terms, utils.index_description(signal_desc, stemmer), signal_idx)
 
         for vfunc in cls.virtual_methods:
-            vfunc_idx = len(index_symbols)
             if vfunc.doc is not None:
                 vfunc_desc = vfunc.doc.content
             else:
@@ -239,13 +183,10 @@ def _gen_classes(config, stemmer, index, repository, symbols):
                 "type_name": cls.name,
                 "summary": utils.preprocess_docs(vfunc_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, utils.index_symbol(vfunc.name, stemmer), vfunc_idx)
-            add_index_terms(index_terms, utils.index_description(vfunc_desc, stemmer), vfunc_idx)
 
         if cls.type_struct is not None:
             cls_struct = namespace.find_record(cls.type_struct)
             for cls_method in cls_struct.methods:
-                cls_method_idx = len(index_symbols)
                 if cls_method.doc is not None:
                     cls_method_desc = cls_method.doc.content
                 else:
@@ -258,20 +199,15 @@ def _gen_classes(config, stemmer, index, repository, symbols):
                     "ident": cls_method.identifier,
                     "summary": utils.preprocess_docs(cls_method_desc, repository.namespace, summary=True, 
plain=True),
                 })
-                add_index_terms(index_terms, [cls_method.identifier], cls_method_idx)
-                add_index_terms(index_terms, utils.index_symbol(cls_method.name, stemmer), cls_method_idx)
-                add_index_terms(index_terms, utils.index_description(cls_method_desc, stemmer), 
cls_method_idx)
 
 
-def _gen_constants(config, stemmer, index, repository, symbols):
+def _gen_constants(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for const in symbols:
         if config.is_hidden(const.name):
             log.debug(f"Skipping hidden const {const.name}")
             continue
-        idx = len(index_symbols)
         if const.doc is not None:
             const_desc = const.doc.content
         else:
@@ -282,20 +218,15 @@ def _gen_constants(config, stemmer, index, repository, symbols):
             "ident": const.ctype,
             "summary": utils.preprocess_docs(const_desc, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [const.ctype.lower()], idx)
-        add_index_terms(index_terms, utils.index_symbol(const.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(const_desc, stemmer), idx)
 
 
-def _gen_domains(config, stemmer, index, repository, symbols):
+def _gen_domains(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for domain in symbols:
         if config.is_hidden(domain.name):
             log.debug(f"Skipping hidden type {domain.name}")
             continue
-        idx = len(index_symbols)
         if domain.doc is not None:
             domain_desc = domain.doc.content
         else:
@@ -306,17 +237,8 @@ def _gen_domains(config, stemmer, index, repository, symbols):
             "ctype": domain.base_ctype,
             "summary": utils.preprocess_docs(domain_desc, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [domain.base_ctype.lower()], idx)
-        add_index_terms(index_terms, utils.index_identifier(domain.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(domain_desc, stemmer), idx)
-
-        for member in domain.members:
-            add_index_terms(index_terms, [member.name], idx)
-            if member.doc is not None:
-                add_index_terms(index_terms, utils.index_description(member.doc.content, stemmer), idx)
 
         for func in domain.functions:
-            func_idx = len(index_symbols)
             if func.doc is not None:
                 func_desc = func.doc.content
             else:
@@ -328,20 +250,15 @@ def _gen_domains(config, stemmer, index, repository, symbols):
                 "ident": func.identifier,
                 "summary": utils.preprocess_docs(func_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, [func.identifier], func_idx)
-            add_index_terms(index_terms, utils.index_symbol(func.name, stemmer), func_idx)
-            add_index_terms(index_terms, utils.index_description(func_desc, stemmer), func_idx)
 
 
-def _gen_enums(config, stemmer, index, repository, symbols):
+def _gen_enums(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for enum in symbols:
         if config.is_hidden(enum.name):
             log.debug(f"Skipping hidden type {enum.name}")
             continue
-        idx = len(index_symbols)
         if enum.doc is not None:
             enum_desc = enum.doc.content
         else:
@@ -352,17 +269,8 @@ def _gen_enums(config, stemmer, index, repository, symbols):
             "ctype": enum.base_ctype,
             "summary": utils.preprocess_docs(enum_desc, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [enum.base_ctype.lower()], idx)
-        add_index_terms(index_terms, utils.index_identifier(enum.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(enum_desc, stemmer), idx)
-
-        for member in enum.members:
-            add_index_terms(index_terms, [member.name], idx)
-            if member.doc is not None:
-                add_index_terms(index_terms, utils.index_description(member.doc.content, stemmer), idx)
 
         for func in enum.functions:
-            func_idx = len(index_symbols)
             if func.doc is not None:
                 func_desc = func.doc.content
             else:
@@ -374,20 +282,15 @@ def _gen_enums(config, stemmer, index, repository, symbols):
                 "ident": func.identifier,
                 "summary": utils.preprocess_docs(func_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, [func.identifier], func_idx)
-            add_index_terms(index_terms, utils.index_symbol(func.name, stemmer), func_idx)
-            add_index_terms(index_terms, utils.index_description(func_desc, stemmer), func_idx)
 
 
-def _gen_functions(config, stemmer, index, repository, symbols):
+def _gen_functions(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for func in symbols:
         if config.is_hidden(func.name):
             log.debug(f"Skipping hidden function {func.name}")
             continue
-        idx = len(index_symbols)
         if func.doc is not None:
             func_desc = func.doc.content
         else:
@@ -398,20 +301,15 @@ def _gen_functions(config, stemmer, index, repository, symbols):
             "ident": func.identifier,
             "summary": utils.preprocess_docs(func_desc, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [func.identifier], idx)
-        add_index_terms(index_terms, utils.index_symbol(func.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(func_desc, stemmer), idx)
 
 
-def _gen_function_macros(config, stemmer, index, repository, symbols):
+def _gen_function_macros(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for func in symbols:
         if config.is_hidden(func.name):
             log.debug(f"Skipping hidden macro {func.name}")
             continue
-        idx = len(index_symbols)
         if func.doc is not None:
             func_desc = func.doc.content
         else:
@@ -422,20 +320,15 @@ def _gen_function_macros(config, stemmer, index, repository, symbols):
             "ident": func.identifier,
             "summary": utils.preprocess_docs(func_desc, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [func.identifier], idx)
-        add_index_terms(index_terms, utils.index_symbol(func.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(func_desc, stemmer), idx)
 
 
-def _gen_interfaces(config, stemmer, index, repository, symbols):
+def _gen_interfaces(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for iface in symbols:
         if config.is_hidden(iface.name):
             log.debug(f"Skipping hidden type {iface.name}")
             continue
-        idx = len(index_symbols)
         if iface.doc is not None:
             iface_desc = iface.doc.content
         else:
@@ -446,12 +339,8 @@ def _gen_interfaces(config, stemmer, index, repository, symbols):
             "ctype": iface.base_ctype,
             "summary": utils.preprocess_docs(iface_desc, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [iface.base_ctype.lower()], idx)
-        add_index_terms(index_terms, utils.index_identifier(iface.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(iface_desc, stemmer), idx)
 
         for method in iface.methods:
-            method_idx = len(index_symbols)
             if method.doc is not None:
                 method_desc = method.doc.content
             else:
@@ -463,12 +352,8 @@ def _gen_interfaces(config, stemmer, index, repository, symbols):
                 "ident": method.identifier,
                 "summary": utils.preprocess_docs(method_desc, repository.namespace, summary=True, 
plain=True),
             })
-            add_index_terms(index_terms, [method.identifier], method_idx)
-            add_index_terms(index_terms, utils.index_symbol(method.name, stemmer), method_idx)
-            add_index_terms(index_terms, utils.index_description(method_desc, stemmer), method_idx)
 
         for func in iface.functions:
-            func_idx = len(index_symbols)
             if func.doc is not None:
                 func_desc = func.doc.content
             else:
@@ -480,15 +365,11 @@ def _gen_interfaces(config, stemmer, index, repository, symbols):
                 "ident": func.identifier,
                 "summary": utils.preprocess_docs(func_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, [func.identifier], func_idx)
-            add_index_terms(index_terms, utils.index_symbol(func.name, stemmer), func_idx)
-            add_index_terms(index_terms, utils.index_description(func_desc, stemmer), func_idx)
 
         for prop_name, prop in iface.properties.items():
             if config.is_hidden(iface.name, 'property', prop_name):
                 log.debug(f"Skipping hidden property {iface.name}.{prop_name}")
                 continue
-            prop_idx = len(index_symbols)
             if prop.doc is not None:
                 prop_desc = prop.doc.content
             else:
@@ -499,14 +380,11 @@ def _gen_interfaces(config, stemmer, index, repository, symbols):
                 "type_name": iface.name,
                 "summary": utils.preprocess_docs(prop_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, utils.index_symbol(prop.name, stemmer), prop_idx)
-            add_index_terms(index_terms, utils.index_description(prop_desc, stemmer), prop_idx)
 
         for signal_name, signal in iface.signals.items():
             if config.is_hidden(iface.name, 'signal', signal_name):
                 log.debug(f"Skipping hidden signal {iface.name}.{signal_name}")
                 continue
-            signal_idx = len(index_symbols)
             if signal.doc is not None:
                 signal_desc = signal.doc.content
             else:
@@ -517,11 +395,8 @@ def _gen_interfaces(config, stemmer, index, repository, symbols):
                 "type_name": iface.name,
                 "summary": utils.preprocess_docs(signal_desc, repository.namespace, summary=True, 
plain=True),
             })
-            add_index_terms(index_terms, utils.index_symbol(signal.name, stemmer), signal_idx)
-            add_index_terms(index_terms, utils.index_description(signal_desc, stemmer), signal_idx)
 
         for vfunc in iface.virtual_methods:
-            vfunc_idx = len(index_symbols)
             if vfunc.doc is not None:
                 vfunc_desc = vfunc.doc.content
             else:
@@ -532,19 +407,15 @@ def _gen_interfaces(config, stemmer, index, repository, symbols):
                 "type_name": iface.name,
                 "summary": utils.preprocess_docs(vfunc_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, utils.index_symbol(vfunc.name, stemmer), vfunc_idx)
-            add_index_terms(index_terms, utils.index_description(vfunc_desc, stemmer), vfunc_idx)
 
 
-def _gen_records(config, stemmer, index, repository, symbols):
+def _gen_records(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for record in symbols:
         if config.is_hidden(record.name):
             log.debug(f"Skipping hidden type {record.name}")
             continue
-        idx = len(index_symbols)
         if record.doc is not None:
             desc = record.doc.content
         else:
@@ -555,12 +426,8 @@ def _gen_records(config, stemmer, index, repository, symbols):
             "ctype": record.base_ctype,
             "summary": utils.preprocess_docs(desc, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [record.base_ctype.lower()], idx)
-        add_index_terms(index_terms, utils.index_identifier(record.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(desc, stemmer), idx)
 
         for ctor in record.constructors:
-            ctor_idx = len(index_symbols)
             if ctor.doc is not None:
                 ctor_desc = ctor.doc.content
             else:
@@ -572,12 +439,8 @@ def _gen_records(config, stemmer, index, repository, symbols):
                 "ident": ctor.identifier,
                 "summary": utils.preprocess_docs(ctor_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, [ctor.identifier], ctor_idx)
-            add_index_terms(index_terms, utils.index_symbol(ctor.name, stemmer), ctor_idx)
-            add_index_terms(index_terms, utils.index_description(ctor_desc, stemmer), ctor_idx)
 
         for method in record.methods:
-            method_idx = len(index_symbols)
             if method.doc is not None:
                 method_desc = method.doc.content
             else:
@@ -589,12 +452,8 @@ def _gen_records(config, stemmer, index, repository, symbols):
                 "ident": method.identifier,
                 "summary": utils.preprocess_docs(method_desc, repository.namespace, summary=True, 
plain=True),
             })
-            add_index_terms(index_terms, [method.identifier], method_idx)
-            add_index_terms(index_terms, utils.index_symbol(method.name, stemmer), method_idx)
-            add_index_terms(index_terms, utils.index_description(method_desc, stemmer), method_idx)
 
         for func in record.functions:
-            func_idx = len(index_symbols)
             if func.doc is not None:
                 func_desc = func.doc.content
             else:
@@ -606,20 +465,15 @@ def _gen_records(config, stemmer, index, repository, symbols):
                 "ident": func.identifier,
                 "summary": utils.preprocess_docs(func_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, [func.identifier], func_idx)
-            add_index_terms(index_terms, utils.index_symbol(func.name, stemmer), func_idx)
-            add_index_terms(index_terms, utils.index_description(func_desc, stemmer), func_idx)
 
 
-def _gen_unions(config, stemmer, index, repository, symbols):
+def _gen_unions(config, index, repository, symbols):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for union in symbols:
         if config.is_hidden(union.name):
             log.debug(f"Skipping hidden type {union.name}")
             continue
-        idx = len(index_symbols)
         if union.doc is not None:
             desc = union.doc.content
         else:
@@ -630,12 +484,8 @@ def _gen_unions(config, stemmer, index, repository, symbols):
             "ctype": union.base_ctype,
             "summary": utils.preprocess_docs(desc, repository.namespace, summary=True, plain=True),
         })
-        add_index_terms(index_terms, [union.base_ctype.lower()], idx)
-        add_index_terms(index_terms, utils.index_identifier(union.name, stemmer), idx)
-        add_index_terms(index_terms, utils.index_description(desc, stemmer), idx)
 
         for ctor in union.constructors:
-            ctor_idx = len(index_symbols)
             if ctor.doc is not None:
                 ctor_desc = ctor.doc.content
             else:
@@ -647,12 +497,8 @@ def _gen_unions(config, stemmer, index, repository, symbols):
                 "ident": ctor.identifier,
                 "summary": utils.preprocess_docs(ctor_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, [ctor.identifier], ctor_idx)
-            add_index_terms(index_terms, utils.index_symbol(ctor.name, stemmer), ctor_idx)
-            add_index_terms(index_terms, utils.index_description(ctor_desc, stemmer), ctor_idx)
 
         for method in union.methods:
-            method_idx = len(index_symbols)
             if method.doc is not None:
                 method_desc = method.doc.content
             else:
@@ -664,12 +510,8 @@ def _gen_unions(config, stemmer, index, repository, symbols):
                 "ident": method.identifier,
                 "summary": utils.preprocess_docs(method_desc, repository.namespace, summary=True, 
plain=True),
             })
-            add_index_terms(index_terms, [method.identifier], method_idx)
-            add_index_terms(index_terms, utils.index_symbol(method.name, stemmer), method_idx)
-            add_index_terms(index_terms, utils.index_description(method_desc, stemmer), method_idx)
 
         for func in union.functions:
-            func_idx = len(index_symbols)
             if func.doc is not None:
                 func_desc = func.doc.content
             else:
@@ -681,14 +523,10 @@ def _gen_unions(config, stemmer, index, repository, symbols):
                 "ident": func.identifier,
                 "summary": utils.preprocess_docs(func_desc, repository.namespace, summary=True, plain=True),
             })
-            add_index_terms(index_terms, [func.identifier], func_idx)
-            add_index_terms(index_terms, utils.index_symbol(func.name, stemmer), func_idx)
-            add_index_terms(index_terms, utils.index_description(func_desc, stemmer), func_idx)
 
 
-def _gen_content_files(config, stemmer, index, repository, content_dirs):
+def _gen_content_files(config, index, repository, content_dirs):
     index_symbols = index["symbols"]
-    index_terms = index["terms"]
 
     for file_name in config.content_files:
         src_file = utils.find_extra_content_file(content_dirs, file_name)
@@ -719,9 +557,6 @@ def _gen_content_files(config, stemmer, index, repository, content_dirs):
             "summary": utils.preprocess_docs(src_data, repository.namespace, summary=True, plain=True),
         })
 
-        content_idx = len(index_symbols)
-        add_index_terms(index_terms, utils.index_description(src_data, stemmer), content_idx)
-
 
 def gen_indices(config, repository, content_dirs, output_dir):
     namespace = repository.namespace
@@ -767,8 +602,6 @@ def gen_indices(config, repository, content_dirs, output_dir):
         "terms": {},
     }
 
-    stemmer = porter.PorterStemmer()
-
     # Each section is isolated, so we run it into a thread pool
     for section in all_indices:
         generator = all_indices.get(section, None)
@@ -782,14 +615,14 @@ def gen_indices(config, repository, content_dirs, output_dir):
             continue
 
         log.debug(f"Generating symbols for section {section}")
-        generator(config, stemmer, index, repository, s)
+        generator(config, index, repository, s)
 
-    _gen_content_files(config, stemmer, index, repository, content_dirs)
+    _gen_content_files(config, index, repository, content_dirs)
 
     # Ensure iteration order is reproducible by sorting symbols by type/name,
     # and terms by key. This has no overhead since values are not copied.
     index["symbols"].sort(key=lambda s: (s["type"], s["name"]))
-    index["terms"] = dict(sorted(index["terms"].items()))
+    index["terms"] = {}
 
     data = json.dumps(index, separators=(',', ':'))
     index_file = os.path.join(output_dir, "index.json")
diff --git a/gidocgen/utils.py b/gidocgen/utils.py
index dd2544f..5fb4552 100644
--- a/gidocgen/utils.py
+++ b/gidocgen/utils.py
@@ -13,7 +13,7 @@ from pygments.lexers import get_lexer_by_name
 from pygments.formatters import HtmlFormatter
 from typogrify.filters import typogrify
 
-from . import gir, log, mdext, porter
+from . import gir, log, mdext
 
 
 # The beginning of a gtk-doc code block:
@@ -727,80 +727,6 @@ def preprocess_docs(text, namespace, summary=False, md=None, extensions=[], plai
     return Markup(typogrify(text, ignore_tags=['h1', 'h2', 'h3', 'h4']))
 
 
-def stem(word, stemmer=None):
-    if stemmer is None:
-        stemmer = porter.PorterStemmer()
-    return stemmer.stem(word, 0, len(word) - 1)
-
-
-def index_description(text, stemmer=None):
-    processed_text = []
-
-    inside_code_block = False
-    for line in text.split("\n"):
-        if not inside_code_block and (line.startswith('```') or line.startswith('|[')):
-            inside_code_block = True
-            continue
-
-        if inside_code_block and (line.startswith('```') or line.startswith(']|')):
-            inside_code_block = False
-            continue
-
-        if not inside_code_block:
-            processed_text.append(line)
-
-    data = " ".join(processed_text)
-    terms = set()
-    for chunk in data.split(" "):
-        chunk = chunk.lower()
-        if chunk in ["\n", "\r", "\r\n"]:
-            continue
-        # Skip gtk-doc sygils
-        if chunk.startswith('%') or chunk.startswith('#') or chunk.startswith('@') or chunk.endswith('()'):
-            continue
-        # Skip gi-docgen links
-        if chunk.startswith('[') and chunk.endswith(']') and '@' in chunk:
-            continue
-        # Skip images
-        if chunk.startswith('!['):
-            continue
-        if chunk in EN_STOPWORDS:
-            continue
-        chunk = re.sub(r"`(\w+)`", r"\g<1>", chunk)
-        chunk = re.sub(r"[,\.:;`]$", '', chunk)
-        chunk = re.sub(r"[\(\)]+", '', chunk)
-        terms.add(stem(chunk, stemmer))
-    return terms
-
-
-def canonicalize(symbol):
-    return symbol.replace('-', '_')
-
-
-def index_identifier(symbol, stemmer=None):
-    """Chunks an identifier (e.g. EventControllerClik) into terms useful for indexing."""
-    symbol = re.sub(CAMEL_CASE_START_RE, r"\g<1>_\g<2>", symbol)
-    symbol = re.sub(CAMEL_CASE_CHUNK_RE, r"\g<1>_\g<2>", symbol)
-    symbol = symbol.replace('-', '_')
-    symbol = symbol.lower()
-    terms = set()
-    for chunk in symbol.split('_'):
-        if chunk in EN_STOPWORDS:
-            continue
-        terms.add(stem(chunk, stemmer))
-    return terms
-
-
-def index_symbol(symbol, stemmer=None):
-    """Chunks a symbol (e.g. set_layout_manager) into terms useful for indexing."""
-    terms = set()
-    for chunk in canonicalize(symbol).split('_'):
-        if chunk in EN_STOPWORDS:
-            continue
-        terms.add(stem(chunk, stemmer))
-    return terms
-
-
 def code_highlight(text, language='c'):
     lexer = get_lexer_by_name(language)
     formatter = HtmlFormatter()


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]