[hamster-applet] moved from house-grown search to sqlite's full text search, updating index on demand (in get_facts i



commit fee859e098023f6a83f090970436bfde1764ccbc
Author: Toms Bauģis <toms baugis gmail com>
Date:   Thu Jul 22 13:15:04 2010 +0100

    moved from house-grown search to sqlite's full text search, updating index on demand (in get_facts if search terms are specified and taking date span into account), thus resulting in just a slight delay upon first shot. fixes bugs 621181, 618009

 src/hamster/db.py |  156 ++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 108 insertions(+), 48 deletions(-)
---
diff --git a/src/hamster/db.py b/src/hamster/db.py
index aac0532..c410a86 100644
--- a/src/hamster/db.py
+++ b/src/hamster/db.py
@@ -213,6 +213,14 @@ class Storage(storage.Storage):
             """
             self.execute(update, (name, name.lower(), id))
 
+        affected_query = """
+            SELECT id
+              FROM facts
+             WHERE activity_id in (SELECT id FROM activities where category_id=?)
+        """
+        affected_ids = [res[0] for res in self.fetchall(affected_query, (id,))]
+        self.__remove_index(affected_ids)
+
 
     def __get_activity_by_name(self, name, category_id = None, resurrect = True):
         """get most recent, preferably not deleted activity by it's name"""
@@ -569,6 +577,7 @@ class Storage(storage.Storage):
         params = [(fact_id, tag["id"]) for tag in tags]
         self.execute(insert, params)
 
+        self.__remove_index([fact_id])
         return fact_id
 
     def __last_insert_rowid(self):
@@ -585,6 +594,14 @@ class Storage(storage.Storage):
 
 
     def __get_facts(self, date, end_date = None, search_terms = ""):
+        from configuration import conf
+        day_start = conf.get("day_start_minutes")
+        day_start = dt.time(day_start / 60, day_start % 60)
+
+        split_time = day_start
+        datetime_from = dt.datetime.combine(date, split_time)
+        datetime_to = dt.datetime.combine(end_date, split_time) + dt.timedelta(days = 1)
+
         query = """
                    SELECT a.id AS id,
                           a.start_time AS start_time,
@@ -601,55 +618,20 @@ class Storage(storage.Storage):
                     WHERE (a.end_time >= ? OR a.end_time IS NULL) AND a.start_time <= ?
         """
 
-        # let's see what we can do with search terms
-        # we will be looking in activity names, descriptions, categories and tags
-        # comma will be treated as OR
-        # space will be treated as AND or possible join
+        if search_terms:
+            # check if we need changes to the index
+            self.__check_index(datetime_from, datetime_to)
 
-
-        # split by comma and then by space and remove all extra spaces
-        or_bits = [[term.strip().lower().replace("'", "''") #striping removing case sensitivity and escaping quotes in term
-                          for term in terms.strip().split(" ") if term.strip()]
-                          for terms in search_terms.split(",") if terms.strip()]
-
-        def all_fields(term):
-            return """(lower(a.description) like '%%%(term)s%%'
-                       or b.search_name = '%(term)s'
-                       or c.search_name = '%(term)s'
-                       or lower(e.name) = '%(term)s' )""" % dict(term = term)
-
-        if or_bits:
-            search_query = "1<>1 " # will be building OR chain, so start with a false
-
-            for and_bits in or_bits:
-                if len(and_bits) == 1:
-                    and_query = all_fields(and_bits[0])
-                else:
-                    and_query = "1=1 "  # will be building AND chain, so start with a true
-                    # if we have more than one word, go for "(a and b) or ab"
-                    # to match two word tags
-                    for bit1, bit2 in zip(and_bits, and_bits[1:]):
-                        and_query += "and (%s and %s) or %s" % (all_fields(bit1),
-                                                                all_fields(bit2),
-                                                                all_fields("%s %s" % (bit1, bit2)))
-
-                search_query = "%s or (%s) " % (search_query, and_query)
-
-            query = "%s and (%s)" % (query, search_query)
+            search_terms = search_terms.replace('\\', '\\\\').replace('%', '\\%').replace('_', '\\_')
+            query += """ AND a.id in (SELECT id
+                                        FROM fact_index
+                                       WHERE fact_index MATCH '%s')""" % search_terms
 
 
 
         query += " ORDER BY a.start_time, e.name"
         end_date = end_date or date
 
-        from configuration import conf
-        day_start = conf.get("day_start_minutes")
-        day_start = dt.time(day_start / 60, day_start % 60)
-
-        split_time = day_start
-        datetime_from = dt.datetime.combine(date, split_time)
-        datetime_to = dt.datetime.combine(end_date, split_time) + dt.timedelta(days = 1)
-
         facts = self.fetchall(query, (_("Unsorted"),
                                       datetime_from,
                                       datetime_to))
@@ -706,6 +688,8 @@ class Storage(storage.Storage):
                       "DELETE FROM facts where id = ?"]
         self.execute(statements, [(fact_id,)] * 2)
 
+        self.__remove_index([fact_id])
+
     def __get_category_activities(self, category_id):
         """returns list of activities, if category is specified, order by name
            otherwise - by activity_order"""
@@ -758,11 +742,20 @@ class Storage(storage.Storage):
     def __remove_category(self, id):
         """move all activities to unsorted and remove category"""
 
+        affected_query = """
+            SELECT id
+              FROM facts
+             WHERE activity_id in (SELECT id FROM activities where category_id=?)
+        """
+        affected_ids = [res[0] for res in self.fetchall(affected_query, (id,))]
+
         update = "update activities set category_id = -1 where category_id = ?"
         self.execute(update, (id, ))
 
         self.execute("delete from categories where id = ?", (id, ))
 
+        self.__remove_index(affected_ids)
+
 
     def __add_activity(self, name, category_id = None, temporary = False):
         # first check that we don't have anything like that yet
@@ -795,6 +788,56 @@ class Storage(storage.Storage):
         """
         self.execute(query, (name, name.lower(), category_id, id))
 
+        affected_ids = [res[0] for res in self.fetchall("select id from facts where activity_id = ?", (id,))]
+        self.__remove_index(affected_ids)
+
+
+    def __remove_index(self, ids):
+        """remove affected ids from the index"""
+        if not ids:
+            return
+
+        ids = ",".join((str(id) for id in ids))
+        self.execute("DELETE FROM fact_index where id in (%s)" % ids)
+
+
+    def __check_index(self, start_date, end_date):
+        """check if maybe index needs rebuilding in the time span"""
+        index_query = """SELECT id
+                           FROM facts
+                          WHERE (end_time >= ? OR end_time IS NULL)
+                            AND start_time <= ?
+                            AND id not in(select id from fact_index)"""
+
+        rebuild_ids = ",".join([str(res[0]) for res in self.fetchall(index_query, (start_date, end_date))])
+
+        if rebuild_ids:
+            query = """
+                       SELECT a.id AS id,
+                              a.start_time AS start_time,
+                              a.end_time AS end_time,
+                              a.description as description,
+                              b.name AS name, b.id as activity_id,
+                              coalesce(c.name, ?) as category,
+                              e.name as tag
+                         FROM facts a
+                    LEFT JOIN activities b ON a.activity_id = b.id
+                    LEFT JOIN categories c ON b.category_id = c.id
+                    LEFT JOIN fact_tags d ON d.fact_id = a.id
+                    LEFT JOIN tags e ON e.id = d.tag_id
+                        WHERE a.id in (%s)
+                     ORDER BY a.id
+            """ % rebuild_ids
+
+            facts = self.__group_tags(self.fetchall(query, (_("Unsorted"), )))
+
+            insert = """INSERT INTO fact_index (id, name, category, description, tag)
+                             VALUES (?, ?, ?, ?, ?)"""
+            params = [(fact['id'], fact['name'], fact['category'], fact['description'], " ".join(fact['tags'])) for fact in facts]
+
+            self.executemany(insert, params)
+
+
     """ Here be dragons (lame connection/cursor wrappers) """
     def get_connection(self):
         if self.con is None:
@@ -836,15 +879,25 @@ class Storage(storage.Storage):
         con = self.__con or self.connection
         cur = self.__cur or con.cursor()
 
-        if isinstance(statement, list) == False: #we kind of think that we will get list of instructions
+        if isinstance(statement, list) == False: # we expect to receive instructions in list
             statement = [statement]
             params = [params]
 
-        if isinstance(statement, list):
-            for i in range(len(statement)):
-                logging.debug("%s %s" % (statement[i], params[i]))
+        for state, param in zip(statement, params):
+            logging.debug("%s %s" % (state, param))
+            cur.execute(state, param)
+
+        if not self.__con:
+            con.commit()
+            cur.close()
+            self.register_modification()
+
+    def executemany(self, statement, params = []):
+        con = self.__con or self.connection
+        cur = self.__cur or con.cursor()
 
-                res = cur.execute(statement[i], params[i])
+        logging.debug("%s %s" % (statement, params))
+        cur.executemany(statement, params)
 
         if not self.__con:
             con.commit()
@@ -852,6 +905,7 @@ class Storage(storage.Storage):
             self.register_modification()
 
 
+
     def start_transaction(self):
         # will give some hints to execute not to close or commit anything
         self.__con = self.connection
@@ -880,7 +934,7 @@ class Storage(storage.Storage):
 
         """upgrade DB to hamster version"""
         version = self.fetchone("SELECT version FROM version")["version"]
-        current_version = 8
+        current_version = 9
 
         if version < 2:
             """moving from fact_date, fact_time to start_time, end_time"""
@@ -1089,11 +1143,17 @@ class Storage(storage.Storage):
             for category in categories:
                 self.execute(statement, (category['name'].lower(), category['id']))
 
+        if version < 9:
+            # adding full text search
+            self.execute("""CREATE VIRTUAL TABLE fact_index
+                                           USING fts3(id, name, category, description, tag)""")
+
 
         # at the happy end, update version number
         if version < current_version:
             #lock down current version
             self.execute("UPDATE version SET version = %d" % current_version)
+            print "updated database from version %d to %d" % (version, current_version)
 
         """we start with an empty database and then populate with default
            values. This way defaults can be localized!"""



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]