[evolution-ews] Optimise EWS GAL update not to rewrite unchanged records



commit 0c38f32e127ac1ddad81ad4787f53ba27edbc989
Author: David Woodhouse <David Woodhouse intel com>
Date:   Thu Sep 4 14:24:15 2014 +0100

    Optimise EWS GAL update not to rewrite unchanged records
    
    Store the SHA1 of the binary data in bdata in the sqlite. When processing
    the new OAB file, if the bdata already matches then do nothing to the
    existing record.
    
    Before starting to process the OAB file, build a GHashTable with all the
    UIDs which already exist in the database. Remove them from the GHashTable
    as they are seen while processing the OAB file. At the end, any which are
    still left in the GHashTable are deleted.
    
    The value in the GHashTable is the SHA1, and is freed either when a
    matching record is decoded or at the end when we process the hash table
    to remove all remaining entries, with the append_to_list() helper
    function.
    
    This should *drastically* improve the performance of the GAL update.

 src/addressbook/e-book-backend-ews.c |  138 +++++++++++++++++++++------------
 src/addressbook/ews-oab-decoder.c    |    2 +-
 src/addressbook/ews-oab-decoder.h    |    1 +
 src/addressbook/oab-decode-test.c    |   13 ++-
 4 files changed, 99 insertions(+), 55 deletions(-)
---
diff --git a/src/addressbook/e-book-backend-ews.c b/src/addressbook/e-book-backend-ews.c
index d000ce5..7b2e00c 100644
--- a/src/addressbook/e-book-backend-ews.c
+++ b/src/addressbook/e-book-backend-ews.c
@@ -231,34 +231,6 @@ book_backend_ews_ensure_connected (EBookBackendEws *bbews,
        return FALSE;
 }
 
-static gboolean
-ews_remove_attachments (const gchar *attachment_dir)
-{
-       GDir *dir;
-
-       dir = g_dir_open (attachment_dir, 0, NULL);
-       if (dir) {
-               const gchar *fname;
-               gchar *full_path;
-
-               while ((fname = g_dir_read_name (dir))) {
-                       full_path = g_build_filename (attachment_dir, fname, NULL);
-                       if (g_unlink (full_path) != 0) {
-                               g_free (full_path);
-                               g_dir_close (dir);
-
-                               return FALSE;
-                       }
-
-                       g_free (full_path);
-               }
-
-               g_dir_close (dir);
-       }
-
-       return TRUE;
-}
-
 static const struct phone_field_mapping {
        EContactField field;
        const gchar *element;
@@ -2370,51 +2342,84 @@ ews_remove_old_gal_file (EBookBackendEws *cbews,
 }
 
 struct _db_data {
+       GHashTable *uids;
        GSList *contact_collector;
+       GSList *sha1_collector;
        guint collected_length;
        EBookBackendEws *cbews;
        GCancellable *cancellable;
+       gint unchanged;
+       gint changed;
+       gint added;
+       gint percent;
 };
 
 static void
 ews_gal_store_contact (EContact *contact,
                        goffset offset,
+                      const gchar *sha1,
                        guint percent,
                        gpointer user_data,
                        GError **error)
 {
        struct _db_data *data = (struct _db_data *) user_data;
        EBookBackendEwsPrivate *priv = data->cbews->priv;
+       const gchar *uid = e_contact_get_const (contact, E_CONTACT_UID);
+       gchar *db_sha1 = NULL;
 
        g_return_if_fail (priv->summary != NULL);
 
+       /* Hm, can we not do these two at once? */
+       db_sha1 = g_hash_table_lookup (data->uids, uid);
+       if (g_hash_table_remove (data->uids, uid)) {
+               if (!g_strcmp0 (db_sha1, sha1)) {
+                       data->unchanged++;
+                       goto out;
+               }
+               data->changed++;
+       } else
+               data->added++;
+
        data->contact_collector = g_slist_prepend (data->contact_collector, g_object_ref (contact));
+       data->sha1_collector = g_slist_prepend (data->sha1_collector, g_strdup (sha1));
        data->collected_length += 1;
 
        if (data->collected_length == 1000 || percent >= 100) {
                GSList *l;
-               GList *list, *link;
-               gchar *status_message = NULL;
-
-               d (g_print ("GAL adding contacts, percent complete : %d \n", percent);)
-
-               status_message = g_strdup_printf (_("Downloading contacts in %s %d%% completed... "), 
priv->folder_name, percent);
-               list = e_book_backend_list_views (E_BOOK_BACKEND (data->cbews));
-               for (link = list; link != NULL; link = g_list_next (link))
-                       e_data_book_view_notify_progress (E_DATA_BOOK_VIEW (link->data), -1, status_message);
-               g_list_free_full (list, g_object_unref);
-               g_free (status_message);
 
                data->contact_collector = g_slist_reverse (data->contact_collector);
-               e_book_sqlite_add_contacts (priv->summary, data->contact_collector, NULL, TRUE, 
data->cancellable, error);
+               data->sha1_collector = g_slist_reverse (data->sha1_collector);
+               e_book_sqlite_add_contacts (priv->summary, data->contact_collector, data->sha1_collector,
+                                           TRUE, data->cancellable, error);
 
                for (l = data->contact_collector; l != NULL; l = g_slist_next (l))
                        e_book_backend_notify_update (E_BOOK_BACKEND (data->cbews), E_CONTACT (l->data));
 
                g_slist_free_full (data->contact_collector, g_object_unref);
+               g_slist_free_full (data->sha1_collector, g_free);
                data->contact_collector = NULL;
+               data->sha1_collector = NULL;
                data->collected_length = 0;
        }
+ out:
+       g_free (db_sha1);
+       if (data->percent != percent) {
+               gchar *status_message = NULL;
+               GList *list, *link;
+
+               data->percent = percent;
+
+               d (g_print ("GAL processing contacts, %d%% complete (%d added, %d changed, %d unchanged\n",
+                           percent, data->added, data->changed, data->unchanged);)
+
+               status_message = g_strdup_printf (_("Processing contacts in %s %d%% completed... "),
+                                                 priv->folder_name, percent);
+               list = e_book_backend_list_views (E_BOOK_BACKEND (data->cbews));
+               for (link = list; link != NULL; link = g_list_next (link))
+                       e_data_book_view_notify_progress (E_DATA_BOOK_VIEW (link->data), -1, status_message);
+               g_list_free_full (list, g_object_unref);
+               g_free (status_message);
+       }
 }
 
 static gint det_sort_func (gconstpointer _a, gconstpointer _b)
@@ -2424,6 +2429,14 @@ static gint det_sort_func (gconstpointer _a, gconstpointer _b)
        return a->seq - b->seq;
 }
 
+static void append_to_list (gpointer key, gpointer val, gpointer user_data)
+{
+       GSList **list = user_data;
+
+       *list = g_slist_prepend (*list, key);
+       g_free (val);
+}
+
 static gboolean
 ews_replace_gal_in_db (EBookBackendEws *cbews,
                        const gchar *filename,
@@ -2434,38 +2447,63 @@ ews_replace_gal_in_db (EBookBackendEws *cbews,
        EwsOabDecoder *eod;
        gboolean ret = TRUE;
        gint populated = 0;
+       GSList *stale_uids = NULL;
        struct _db_data data;
 
        g_return_val_if_fail (priv->summary != NULL, FALSE);
 
+       data.unchanged = data.changed = data.added = 0;
+       data.percent = 0;
+       data.uids = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
+
        /* remove the old address-book and create a new one in db */
        e_book_sqlite_get_key_value_int (priv->summary, E_BOOK_SQL_IS_POPULATED_KEY, &populated, NULL);
        if (populated) {
-               GSList *uids = NULL;
+               GSList *slist = NULL, *l;
 
-               e_book_sqlite_search_uids (priv->summary, NULL, &uids, cancellable, NULL);
-               if (uids) {
-                       e_book_sqlite_remove_contacts (priv->summary, uids, cancellable, NULL);
-                       g_slist_free_full (uids, g_free);
-               }
+               e_book_sqlite_search (priv->summary, NULL, TRUE, &slist, cancellable, NULL);
 
-               ews_remove_attachments (priv->attachment_dir);
-       }
+               while (slist) {
+                       EbSqlSearchData *search_data = slist->data;
 
-       if (!ret)
-               return FALSE;
+                       l = slist;
+                       slist = slist->next;
+                       g_slist_free_1 (l);
+
+                       g_hash_table_insert (data.uids, search_data->uid, search_data->extra);
+
+                       /* We steal these */
+                       search_data->extra = search_data->uid = NULL;
+                       e_book_sqlite_search_data_free (search_data);
+               }
+       }
 
        eod = ews_oab_decoder_new (filename, priv->attachment_dir, error);
        if (*error)
                return FALSE;
 
        data.contact_collector = NULL;
+       data.sha1_collector = NULL;
        data.collected_length = 0;
        data.cbews = cbews;
        data.cancellable = cancellable;
 
        ret = ews_oab_decoder_decode (eod, ews_gal_store_contact, &data, cancellable, error);
 
+       /* Remove any items which were not present in the new OAB */
+       g_hash_table_foreach (data.uids, append_to_list, &stale_uids);
+       d (g_print ("GAL removing %d contacts\n", g_slist_length (stale_uids)));
+
+       /* Remove attachments. This will be easier once we add cursor support. */
+       if (stale_uids && !e_book_sqlite_remove_contacts (priv->summary, stale_uids, cancellable, error))
+               ret = FALSE;
+
+       d (g_print("GAL update completed %ssuccessfully. Changed: %d, Unchanged: %d, Added %d, Removed: %d\n",
+                  ret ? "" : "un",
+                  data.changed, data.unchanged, data.added, g_slist_length(stale_uids)));
+
+       g_slist_free (stale_uids);
+       g_hash_table_destroy (data.uids);
        /* always notify views as complete, to not left anything behind,
           if the decode was cancelled before full completion */
        e_book_backend_notify_complete (E_BOOK_BACKEND (cbews));
diff --git a/src/addressbook/ews-oab-decoder.c b/src/addressbook/ews-oab-decoder.c
index d77a6a7..c2eb2c2 100644
--- a/src/addressbook/ews-oab-decoder.c
+++ b/src/addressbook/ews-oab-decoder.c
@@ -1041,7 +1041,7 @@ ews_decode_and_store_oab_records (EwsOabDecoder *eod,
                if (ews_decode_addressbook_record (eod, memstream,
                                                   contact, priv->oab_props,
                                                   cancellable, error))
-                       cb (contact, offset,
+                       cb (contact, offset, sum_str,
                            ((gfloat) (i + 1) / priv->total_records) * 100,
                            user_data, error);
 
diff --git a/src/addressbook/ews-oab-decoder.h b/src/addressbook/ews-oab-decoder.h
index e4f034d..5e92c36 100644
--- a/src/addressbook/ews-oab-decoder.h
+++ b/src/addressbook/ews-oab-decoder.h
@@ -58,6 +58,7 @@ struct _EwsOabDecoderClass {
 
 typedef void   (*EwsOabContactAddedCb)         (EContact *contact,
                                                 goffset offset,
+                                                const gchar *sha1,
                                                 guint percent_complete,
                                                 gpointer user_data,
                                                 GError **error);
diff --git a/src/addressbook/oab-decode-test.c b/src/addressbook/oab-decode-test.c
index 23fbd5b..8854e21 100644
--- a/src/addressbook/oab-decode-test.c
+++ b/src/addressbook/oab-decode-test.c
@@ -13,6 +13,7 @@
 
 struct _db_data {
        GSList *contact_collector;
+       GSList *sha1_collector;
        guint collected_length;
        EBookSqlite *summary;
        const gchar *folderid;
@@ -21,6 +22,7 @@ struct _db_data {
 static void
 ews_test_store_contact (EContact *contact,
                         goffset offset,
+                       const gchar *sha1,
                         guint percent,
                         gpointer user_data,
                         GError **error)
@@ -28,17 +30,20 @@ ews_test_store_contact (EContact *contact,
        struct _db_data *data = (struct _db_data *) user_data;
 
        data->contact_collector = g_slist_prepend (data->contact_collector, g_object_ref (contact));
+       data->sha1_collector = g_slist_prepend (data->sha1_collector, g_strdup (sha1));
        data->collected_length += 1;
 
        if (data->collected_length == 1000 || percent >= 100) {
                data->contact_collector = g_slist_reverse (data->contact_collector);
-               e_book_sqlite_add_contacts (data->summary, data->contact_collector, NULL,
+               e_book_sqlite_add_contacts (data->summary, data->contact_collector,
+                                           data->sha1_collector,
                                            FALSE, NULL, error);
                g_print ("percent complete %d \n", percent);
 
-               g_slist_foreach (data->contact_collector, (GFunc) g_object_unref, NULL);
-               g_slist_free (data->contact_collector);
+               g_slist_free_full (data->contact_collector, g_object_unref);
+               g_slist_free_full (data->sha1_collector, g_free);
                data->contact_collector = NULL;
+               data->sha1_collector = NULL;
                data->collected_length = 0;
        }
 }
@@ -71,7 +76,7 @@ main (gint argc,
                exit(1);
        }
 
-       data.contact_collector = NULL;
+       data.sha1_collector = data.contact_collector = NULL;
        data.collected_length = 0;
        data.summary = summary;
        data.folderid = "de";


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]