[extensions-web] extensions: Start hacking on search



commit 40e281a8dd42d74ca1d8040a605e7cd626cacf2d
Author: Jasper St. Pierre <jstpierre mecheye net>
Date:   Wed Mar 7 12:14:18 2012 -0500

    extensions: Start hacking on search
    
    Add a new search capability, powered by Xapian. Don't expose it in
    the UI just yet -- when we deploy it, we want to have everything
    working and indexed on the server, so split this up into two commits.

 .gitignore                                         |    1 +
 .../management/commands/indexextensions.py         |   19 +++++
 sweettooth/extensions/search.py                    |   70 +++++++++++++++++++
 .../extensions/templates/extensions/list_bare.html |    8 +--
 .../extensions/templates/extensions/list_item.html |    7 ++
 sweettooth/extensions/views.py                     |   71 +++++++++++++++-----
 sweettooth/settings.py                             |    2 +
 7 files changed, 154 insertions(+), 24 deletions(-)
---
diff --git a/.gitignore b/.gitignore
index f981122..b651822 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,6 +41,7 @@ venv
 # sweettooth-specific
 Demos
 sweettooth/uploaded-files
+xapian.db
 sweettooth.wsgi
 local_settings.py
 *.crt
diff --git a/sweettooth/extensions/management/commands/indexextensions.py b/sweettooth/extensions/management/commands/indexextensions.py
new file mode 100644
index 0000000..5d59f83
--- /dev/null
+++ b/sweettooth/extensions/management/commands/indexextensions.py
@@ -0,0 +1,19 @@
+
+from django.core.management.base import BaseCommand
+from extensions.models import Extension
+from extensions.search import index_extension
+
+class Command(BaseCommand):
+    args = ''
+    help = 'Regenerates all metadata.json files and replaces them in the zipfile'
+
+    def handle(self, *args, **options):
+        count = Extension.objects.count()
+        message_length = 0
+        for i, ext in enumerate(Extension.objects.all()):
+            index_extension(ext)
+            message = ("Indexed (%d / %d) %s" % (i + 1, count, ext.uuid))
+            message_length = max(message_length, len(message))
+            self.stdout.write(message.ljust(message_length) + "\r")
+            self.stdout.flush()
+        self.stdout.write('\nSuccessfully indexed all extensions\n')
diff --git a/sweettooth/extensions/search.py b/sweettooth/extensions/search.py
new file mode 100644
index 0000000..1bc92b0
--- /dev/null
+++ b/sweettooth/extensions/search.py
@@ -0,0 +1,70 @@
+
+import xapian
+
+from django.conf import settings
+from django.db.models import signals
+
+from extensions.models import Extension, ExtensionVersion
+
+def index_extension(extension):
+    if extension.latest_version is None:
+        return
+
+    db = xapian.WritableDatabase(settings.XAPIAN_DB_PATH, xapian.DB_CREATE_OR_OPEN)
+
+    termgen = xapian.TermGenerator()
+    termgen.set_stemmer(xapian.Stem("en"))
+
+    doc = xapian.Document()
+    termgen.set_document(doc)
+
+    name = extension.name.lower()
+    uuid = extension.uuid.lower()
+    description = extension.description.lower()
+
+    termgen.index_text(name, 10)
+    termgen.index_text(uuid)
+    termgen.index_text(description)
+
+    doc.set_data(str(extension.pk))
+
+    idterm = "Q%s" % (extension.pk,)
+    doc.add_boolean_term(idterm)
+    db.replace_document(idterm, doc)
+
+def delete_extension(extension):
+    db = xapian.WritableDatabase(settings.XAPIAN_DB_PATH, xapian.DB_CREATE_OR_OPEN)
+    idterm = "Q%s" % (extension.pk,)
+    db.delete_document(idterm)
+
+
+def post_extension_save_handler(instance, **kwargs):
+    index_extension(instance)
+signals.post_save.connect(post_extension_save_handler, sender=Extension)
+
+def post_extension_delete_handler(instance, **kwargs):
+    delete_extension(instance)
+signals.post_delete.connect(post_extension_delete_handler, sender=Extension)
+
+def post_version_save_handler(instance, **kwargs):
+    index_extension(instance.extension)
+signals.post_save.connect(post_version_save_handler, sender=ExtensionVersion)
+
+def post_version_delete_handler(instance, **kwargs):
+    delete_extension(instance.extension)
+signals.post_delete.connect(post_version_delete_handler, sender=ExtensionVersion)
+
+def enquire(querystring):
+    try:
+        db = xapian.Database(settings.XAPIAN_DB_PATH)
+    except xapian.DatabaseOpeningError:
+        return None
+
+    qp = xapian.QueryParser()
+    qp.set_stemmer(xapian.Stem("en"))
+    qp.set_database(db)
+
+    enquire = xapian.Enquire(db)
+    enquire.set_query(qp.parse_query(querystring))
+
+    return enquire
diff --git a/sweettooth/extensions/templates/extensions/list_bare.html b/sweettooth/extensions/templates/extensions/list_bare.html
index 11dd32a..f6bea29 100644
--- a/sweettooth/extensions/templates/extensions/list_bare.html
+++ b/sweettooth/extensions/templates/extensions/list_bare.html
@@ -1,13 +1,7 @@
 {% if extension_list %}
 <ul class="extensions">
   {% for extension in extension_list %}
-  <li class="extension" data-svm="{{ extension.visible_shell_version_map_json }}">
-    <h3 class="extension-name"><a href="{% url extensions-detail pk=extension.pk %}" class="title-link"><img src="{{ extension.icon.url }}" class="icon">{{ extension.name }}</a></h3>
-    <span class="author">by <a href="{% url auth-profile user=extension.creator.username %}">{{ extension.creator }}</a></span>
-    <p class="description">
-      {{ extension.first_line_of_description }}
-    </p>
-  </li>
+    {% include "extensions/list_item.html" %}
   {% endfor %}
 </ul>
 {% else %}
diff --git a/sweettooth/extensions/templates/extensions/list_item.html b/sweettooth/extensions/templates/extensions/list_item.html
new file mode 100644
index 0000000..faac16c
--- /dev/null
+++ b/sweettooth/extensions/templates/extensions/list_item.html
@@ -0,0 +1,7 @@
+<li class="extension" data-svm="{{ extension.visible_shell_version_map_json }}">
+  <h3 class="extension-name"><a href="{% url extensions-detail pk=extension.pk %}" class="title-link"><img src="{{ extension.icon.url }}" class="icon">{{ extension.name }}</a></h3>
+  <span class="author">by <a href="{% url auth-profile user=extension.creator.username %}">{{ extension.creator }}</a></span>
+  <p class="description">
+    {{ extension.first_line_of_description }}
+  </p>
+</li>
diff --git a/sweettooth/extensions/views.py b/sweettooth/extensions/views.py
index 8c8a01a..ed8fba6 100644
--- a/sweettooth/extensions/views.py
+++ b/sweettooth/extensions/views.py
@@ -1,4 +1,6 @@
 
+from math import ceil
+
 from django.core.exceptions import ValidationError
 from django.core.paginator import Paginator, InvalidPage
 from django.core.urlresolvers import reverse
@@ -13,7 +15,7 @@ from django.utils import simplejson as json
 from django.views.decorators.http import require_POST
 from sorl.thumbnail.shortcuts import get_thumbnail
 
-from extensions import models
+from extensions import models, search
 from extensions.forms import UploadForm
 
 from decorators import ajax_view, model_view
@@ -98,7 +100,7 @@ def get_versions_for_version_strings(version_strings):
         if base_version:
             yield base_version
 
-def ajax_query_params_query(request):
+def ajax_query_params_query(request, n_per_page=10):
     version_qs = models.ExtensionVersion.objects.visible()
 
     version_strings = request.GET.getlist('shell_version')
@@ -128,29 +130,60 @@ def ajax_query_params_query(request):
     order = request.GET.get('order', default_order)
     queryset.query.standard_ordering = (order == 'asc')
 
-    return queryset
-
- ajax_view
-def ajax_extensions_list(request):
-    queryset = ajax_query_params_query(request)
-
-    paginator = Paginator(queryset, 10)
+    # Paginate the query
+    paginator = Paginator(queryset, n_per_page)
     page = request.GET.get('page', 1)
     try:
         page_number = int(page)
     except ValueError:
-        if page == 'last':
-            page_number = paginator.num_pages
-        else:
-            # Page is not 'last', nor can it be converted to an int.
-            raise Http404()
+        raise Http404()
+
     try:
         page_obj = paginator.page(page_number)
     except InvalidPage:
         raise Http404()
 
-    return dict(html=render_to_string('extensions/list_bare.html', dict(extension_list=page_obj.object_list)),
-                numpages=paginator.num_pages)
+    return page_obj.object_list, paginator.num_pages
+
+def ajax_query_search_query(request, n_per_page=10):
+    querystring = request.GET.get('search', '')
+
+    enquire = search.enquire(querystring)
+
+    page = request.GET.get('page', 1)
+    try:
+        offset = (int(page) - 1) * n_per_page
+    except ValueError:
+        raise Http404()
+
+    mset = enquire.get_mset(offset, n_per_page)
+    pks = [match.document.get_data() for match in mset]
+
+    num_pages = int(ceil(float(mset.get_matches_estimated()) / n_per_page))
+
+    # filter doesn't guarantee an order, so we need to get all the
+    # possible models then look them up to get the ordering
+    # returned by xapian. This hits the database all at once, rather
+    # than pagesize times.
+    extension_lookup = {}
+    for extension in models.Extension.objects.filter(pk__in=pks):
+        extension_lookup[str(extension.pk)] = extension
+
+    extensions = [extension_lookup[pk] for pk in pks]
+
+    return extensions, num_pages
+
+ ajax_view
+def ajax_extensions_list(request):
+    if request.GET.get('search',  ''):
+        func = ajax_query_search_query
+    else:
+        func = ajax_query_params_query
+
+    object_list, num_pages = func(request)
+
+    return dict(html=render_to_string('extensions/list_bare.html', dict(extension_list=object_list)),
+                numpages=num_pages)
 
 @model_view(models.Extension)
 def extension_view(request, obj, **kwargs):
@@ -327,7 +360,11 @@ def ajax_details_view(request):
 
 @ajax_view
 def ajax_query_view(request):
-    return [ajax_details(e) for e in ajax_query_params_query(request)]
+    if 'search' in request.GET:
+        query = ajax_query_search_query(request)
+    else:
+        query = ajax_query_params_query(request)
+    return [ajax_details(e) for e in query]
 
 @ajax_view
 def ajax_set_status_view(request, newstatus):
diff --git a/sweettooth/settings.py b/sweettooth/settings.py
index 084e164..cd45ae9 100644
--- a/sweettooth/settings.py
+++ b/sweettooth/settings.py
@@ -170,6 +170,8 @@ LOGGING = {
 
 DEFAULT_FROM_EMAIL = "noreply gnome org"
 
+XAPIAN_DB_PATH = os.path.join(SITE_ROOT, "xapian.db")
+
 try:
     from local_settings import *
 except ImportError:



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]