[geary/wip/composer-folks: 9/22] Implement new contact harvester for the engine



commit 65ab37938ff9d5fb98864f4a16f350d63e8274c3
Author: Michael Gratton <mike vee net>
Date:   Mon Jun 10 08:43:40 2019 +1000

    Implement new contact harvester for the engine
    
    Replaces harvesting code in ImapDB code so as to be stand-alone and
    reusable. Implement harvesting policy to only harvest from a set of
    whitelisted special folder types (so junk and trash is not harvested)
    and only harvest valid, non-spoofed, addresses.

 po/POTFILES.in                                     |   1 +
 src/engine/common/common-contact-harvester.vala    | 106 ++++++++++++
 src/engine/meson.build                             |   1 +
 test/engine/api/geary-contact-store-mock.vala      |  26 +++
 .../common/common-contact-harvester-test.vala      | 186 +++++++++++++++++++++
 test/meson.build                                   |   2 +
 test/test-engine.vala                              |   1 +
 7 files changed, 323 insertions(+)
---
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 84efca9a..20ac4461 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -164,6 +164,7 @@ src/engine/app/email-store/app-copy-operation.vala
 src/engine/app/email-store/app-fetch-operation.vala
 src/engine/app/email-store/app-list-operation.vala
 src/engine/app/email-store/app-mark-operation.vala
+src/engine/common/common-contact-harvester.vala
 src/engine/common/common-contact-store-impl.vala
 src/engine/common/common-message-data.vala
 src/engine/db/db.vala
diff --git a/src/engine/common/common-contact-harvester.vala b/src/engine/common/common-contact-harvester.vala
new file mode 100644
index 00000000..ec48917d
--- /dev/null
+++ b/src/engine/common/common-contact-harvester.vala
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2016 Software Freedom Conservancy Inc.
+ * Copyright 2019 Michael Gratton <mike vee net>
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */
+
+/** Denotes objects that extract contacts from email meages. */
+internal interface Geary.ContactHarvester : GLib.Object {
+
+    public abstract async void harvest_from_email(Gee.Collection<Email> messages,
+                                                  GLib.Cancellable? cancellable)
+        throws GLib.Error;
+
+}
+
+/** Default harvester that saves contacts in the contact store. */
+internal class Geary.ContactHarvesterImpl : BaseObject, ContactHarvester {
+
+    private const Email.Field REQUIRED_FIELDS = ORIGINATORS | RECEIVERS;
+
+    private const SpecialFolderType[] FOLDER_WHITELIST = {
+        INBOX,
+        ARCHIVE,
+        SENT,
+        NONE
+    };
+
+
+    private ContactStore store;
+    private Gee.Collection<RFC822.MailboxAddress> owner_mailboxes;
+    private SpecialFolderType location;
+    private bool is_whitelisted;
+
+    public ContactHarvesterImpl(ContactStore store,
+                                SpecialFolderType location,
+                                Gee.Collection<RFC822.MailboxAddress> owners) {
+        this.store = store;
+        this.owner_mailboxes = owners;
+        this.location = location;
+        this.is_whitelisted = (location in FOLDER_WHITELIST);
+    }
+
+    public async void harvest_from_email(Gee.Collection<Email> messages,
+                                         GLib.Cancellable? cancellable)
+        throws GLib.Error {
+        if (this.is_whitelisted && !messages.is_empty) {
+            Gee.Map<string,Contact> contacts = new Gee.HashMap<string,Contact>();
+            int importance = Contact.Importance.SEEN;
+            if (this.location == SENT) {
+                importance = Contact.Importance.SENT_TO;
+            }
+            Email.Field type = 0;
+            foreach (Email message in messages) {
+                if (message.fields.fulfills(REQUIRED_FIELDS)) {
+                    type = Email.Field.ORIGINATORS;
+                    add_contacts(contacts, message.from, type, importance);
+                    if (message.sender != null) {
+                        add_contact(contacts, message.sender, type, importance);
+                    }
+                    add_contacts(contacts, message.bcc, type, importance);
+
+                    type = Email.Field.RECEIVERS;
+                    add_contacts(contacts, message.to, type, importance);
+                    add_contacts(contacts, message.cc, type, importance);
+                    add_contacts(contacts, message.bcc, type, importance);
+                }
+            }
+
+            yield this.store.update_contacts(contacts.values, cancellable);
+        }
+    }
+
+    private void add_contacts(Gee.Map<string, Contact> contacts,
+                              RFC822.MailboxAddresses? addresses,
+                              Email.Field type,
+                              int importance) {
+        if (addresses != null) {
+            foreach (RFC822.MailboxAddress address in addresses) {
+                add_contact(contacts, address, type, importance);
+            }
+        }
+    }
+
+    private inline void add_contact(Gee.Map<string, Contact> contacts,
+                                    RFC822.MailboxAddress address,
+                                    Email.Field type,
+                                    int importance) {
+        if (address.is_valid() && !address.is_spoofed()) {
+            if (type == RECEIVERS && address in this.owner_mailboxes) {
+                importance = Contact.Importance.RECEIVED_FROM;
+            }
+
+            Contact contact = new Contact.from_rfc822_address(
+                address, importance
+            );
+            Contact? existing = contacts[contact.normalized_email];
+            if (existing == null ||
+                existing.highest_importance < contact.highest_importance) {
+                contacts[contact.normalized_email] = contact;
+            }
+        }
+    }
+
+}
diff --git a/src/engine/meson.build b/src/engine/meson.build
index 816a8696..d413e509 100644
--- a/src/engine/meson.build
+++ b/src/engine/meson.build
@@ -66,6 +66,7 @@ geary_engine_vala_sources = files(
   'app/email-store/app-list-operation.vala',
   'app/email-store/app-mark-operation.vala',
 
+  'common/common-contact-harvester.vala',
   'common/common-contact-store-impl.vala',
   'common/common-message-data.vala',
 
diff --git a/test/engine/api/geary-contact-store-mock.vala b/test/engine/api/geary-contact-store-mock.vala
new file mode 100644
index 00000000..dfffcd7e
--- /dev/null
+++ b/test/engine/api/geary-contact-store-mock.vala
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2019 Michael Gratton <mike vee net>
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */
+
+internal class Geary.ContactStoreMock : ContactStore, MockObject, GLib.Object {
+
+    protected Gee.Queue<ExpectedCall> expected {
+        get; set; default = new Gee.LinkedList<ExpectedCall>();
+    }
+
+    public async Contact? get_by_rfc822(Geary.RFC822.MailboxAddress address,
+                                        GLib.Cancellable? cancellable)
+        throws GLib.Error {
+        return object_call<Contact?>("get_by_rfc822", { address }, null);
+    }
+
+    public async void update_contacts(Gee.Collection<Contact> updated,
+                                      GLib.Cancellable? cancellable)
+        throws GLib.Error {
+        void_call("update_contacts", { updated, cancellable });
+    }
+
+}
diff --git a/test/engine/common/common-contact-harvester-test.vala 
b/test/engine/common/common-contact-harvester-test.vala
new file mode 100644
index 00000000..9db3f947
--- /dev/null
+++ b/test/engine/common/common-contact-harvester-test.vala
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2019 Michael Gratton <mike vee net>
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */
+
+
+class Geary.ContactHarvesterImplTest : TestCase {
+
+
+    private ContactStoreMock? store = null;
+    private Email? email = null;
+    private RFC822.MailboxAddress test_address = null;
+    private RFC822.MailboxAddress sender_address = null;
+    private Gee.Collection<RFC822.MailboxAddress> senders = null;
+
+
+    public ContactHarvesterImplTest() {
+        base("Geary.ContactHarvesterImplTest");
+        add_test("whitelisted_folder_type", whitelisted_folder_type);
+        add_test("blacklisted_folder_type", blacklisted_folder_type);
+        add_test("seen_priority", seen_priority);
+        add_test("sent_priority", sent_priority);
+        add_test("received_priority", received_priority);
+    }
+
+    public override void set_up() throws GLib.Error {
+        this.store = new ContactStoreMock();
+        this.email = new Email(
+            new ImapDB.EmailIdentifier.no_message_id(new Imap.UID(1))
+        );
+        // Ensure the minimum required email flags are set
+        this.email.set_originators(null, null, null);
+        this.email.set_receivers(null, null, null);
+
+        this.test_address = new RFC822.MailboxAddress(
+            "Test", "test example com"
+        );
+        this.sender_address = new RFC822.MailboxAddress(
+            "Sender", "sender example com"
+        );
+        this.senders = Collection.single(this.sender_address);
+    }
+
+    public override void tear_down() throws GLib.Error {
+        this.store = null;
+        this.email = null;
+        this.test_address = null;
+        this.sender_address = null;
+        this.senders = null;
+    }
+
+    public void whitelisted_folder_type() throws GLib.Error {
+        ContactHarvesterImpl whitelisted = new ContactHarvesterImpl(
+            this.store,
+            SpecialFolderType.INBOX,
+            this.senders
+        );
+        ExpectedCall call = this.store.expect_call("update_contacts");
+        this.email.set_receivers(
+            new RFC822.MailboxAddresses.single(this.test_address), null, null
+        );
+
+        whitelisted.harvest_from_email.begin(
+            Collection.single(this.email), null,
+            (obj, ret) => { async_complete(ret); }
+        );
+        whitelisted.harvest_from_email.end(async_result());
+
+        this.store.assert_expectations();
+
+        Gee.Collection<Contact> contacts = call.called_arg<Gee.Collection<Contact>>(0);
+        assert_int(1, contacts.size, "contacts length");
+        Contact? created = Collection.get_first<Contact>(contacts) as Contact;
+        assert_non_null(created, "contacts contents");
+
+        assert_string("Test", created.real_name);
+        assert_string("test example com", created.email);
+        assert_string("test example com", created.normalized_email);
+    }
+
+    public void blacklisted_folder_type() throws GLib.Error {
+        ContactHarvesterImpl whitelisted = new ContactHarvesterImpl(
+            this.store,
+            SpecialFolderType.SPAM,
+            this.senders
+        );
+        this.email.set_receivers(
+            new RFC822.MailboxAddresses.single(this.test_address), null, null
+        );
+
+        whitelisted.harvest_from_email.begin(
+            Collection.single(this.email), null,
+            (obj, ret) => { async_complete(ret); }
+        );
+        whitelisted.harvest_from_email.end(async_result());
+
+        this.store.assert_expectations();
+    }
+
+    public void seen_priority() throws GLib.Error {
+        ContactHarvesterImpl whitelisted = new ContactHarvesterImpl(
+            this.store,
+            SpecialFolderType.INBOX,
+            this.senders
+        );
+        ExpectedCall call = this.store.expect_call("update_contacts");
+        this.email.set_receivers(
+            new RFC822.MailboxAddresses.single(this.test_address), null, null
+        );
+
+        whitelisted.harvest_from_email.begin(
+            Collection.single(this.email), null,
+            (obj, ret) => { async_complete(ret); }
+        );
+        whitelisted.harvest_from_email.end(async_result());
+
+        this.store.assert_expectations();
+
+        Gee.Collection<Contact> contacts = call.called_arg<Gee.Collection<Contact>>(0);
+        Contact? created = Collection.get_first<Contact>(contacts) as Contact;
+        assert_int(
+            Contact.Importance.SEEN,
+            created.highest_importance,
+            "call contact importance"
+        );
+    }
+
+    public void sent_priority() throws GLib.Error {
+        ContactHarvesterImpl whitelisted = new ContactHarvesterImpl(
+            this.store,
+            SpecialFolderType.SENT,
+            this.senders
+        );
+        ExpectedCall call = this.store.expect_call("update_contacts");
+        this.email.set_receivers(
+            new RFC822.MailboxAddresses.single(this.test_address), null, null
+        );
+
+        whitelisted.harvest_from_email.begin(
+            Collection.single(this.email), null,
+            (obj, ret) => { async_complete(ret); }
+        );
+        whitelisted.harvest_from_email.end(async_result());
+
+        this.store.assert_expectations();
+
+        Gee.Collection<Contact> contacts = call.called_arg<Gee.Collection<Contact>>(0);
+        Contact? created = Collection.get_first<Contact>(contacts) as Contact;
+        assert_int(
+            Contact.Importance.SENT_TO,
+            created.highest_importance,
+            "call contact importance"
+        );
+    }
+
+    public void received_priority() throws GLib.Error {
+        ContactHarvesterImpl whitelisted = new ContactHarvesterImpl(
+            this.store,
+            SpecialFolderType.SENT,
+            this.senders
+        );
+        ExpectedCall call = this.store.expect_call("update_contacts");
+        this.email.set_receivers(
+            new RFC822.MailboxAddresses.single(this.sender_address), null, null
+        );
+
+        whitelisted.harvest_from_email.begin(
+            Collection.single(this.email), null,
+            (obj, ret) => { async_complete(ret); }
+        );
+        whitelisted.harvest_from_email.end(async_result());
+
+        this.store.assert_expectations();
+
+        Gee.Collection<Contact> contacts = call.called_arg<Gee.Collection<Contact>>(0);
+        Contact? created = Collection.get_first<Contact>(contacts) as Contact;
+        assert_int(
+            Contact.Importance.RECEIVED_FROM,
+            created.highest_importance,
+            "call contact importance"
+        );
+    }
+
+}
diff --git a/test/meson.build b/test/meson.build
index a3e78998..1563a5fe 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -14,6 +14,7 @@ geary_test_engine_sources = [
   # geary-engine_internal.vapi, which leads to duplicate symbols when
   # linking
   'engine/api/geary-account-mock.vala',
+  'engine/api/geary-contact-store-mock.vala',
   'engine/api/geary-credentials-mediator-mock.vala',
   'engine/api/geary-email-identifier-mock.vala',
   'engine/api/geary-email-properties-mock.vala',
@@ -29,6 +30,7 @@ geary_test_engine_sources = [
   'engine/app/app-conversation-monitor-test.vala',
   'engine/app/app-conversation-set-test.vala',
   'engine/common/common-contact-store-impl-test.vala',
+  'engine/common/common-contact-harvester-test.vala',
   'engine/db/db-database-test.vala',
   'engine/db/db-versioned-database-test.vala',
   'engine/imap/command/imap-create-command-test.vala',
diff --git a/test/test-engine.vala b/test/test-engine.vala
index 3c725cdd..95068156 100644
--- a/test/test-engine.vala
+++ b/test/test-engine.vala
@@ -24,6 +24,7 @@ int main(string[] args) {
 
     engine.add_suite(new Geary.AccountInformationTest().get_suite());
     engine.add_suite(new Geary.AttachmentTest().get_suite());
+    engine.add_suite(new Geary.ContactHarvesterImplTest().get_suite());
     engine.add_suite(new Geary.EngineTest().get_suite());
     engine.add_suite(new Geary.FolderPathTest().get_suite());
     engine.add_suite(new Geary.IdleManagerTest().get_suite());


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]