[geary/mjog/mail-merge-plugin: 40/71] Plugin.MailMerge: Add simple CSV reader with unit tests




commit 593f3e2dc3b8f3dda97a8d91a68bdba4b6d761aa
Author: Michael Gratton <mike vee net>
Date:   Thu May 21 13:49:56 2020 +1000

    Plugin.MailMerge: Add simple CSV reader with unit tests

 src/client/plugin/mail-merge/meson.build    |  37 ++++-
 src/client/plugin/mail-merge/util-csv.vala  | 228 ++++++++++++++++++++++++++++
 src/client/plugin/mail-merge/util-test.vala | 181 ++++++++++++++++++++++
 3 files changed, 444 insertions(+), 2 deletions(-)
---
diff --git a/src/client/plugin/mail-merge/meson.build b/src/client/plugin/mail-merge/meson.build
index 218989125..e3d5e555a 100644
--- a/src/client/plugin/mail-merge/meson.build
+++ b/src/client/plugin/mail-merge/meson.build
@@ -1,14 +1,35 @@
 
 plugin_name = 'mail-merge'
 
-plugin_src = files(plugin_name + '.vala')
+plugin_src = files(
+  plugin_name + '.vala'
+)
 plugin_data = plugin_name + plugin_data_suffix
 plugin_dest = plugins_dir / plugin_name
 
-shared_module(
+lib_src = files(
+  'util-csv.vala'
+)
+
+test_src = files(
+  'util-test.vala'
+)
+
+lib = static_library(
+  'util',
+  sources: lib_src,
+  dependencies: plugin_dependencies,
+  include_directories: config_h_dir,
+  vala_args: geary_vala_args,
+  c_args: plugin_c_args,
+  install: false
+)
+
+plugin = shared_module(
   plugin_name,
   sources: plugin_src,
   dependencies: plugin_dependencies,
+  link_with: lib,
   include_directories: config_h_dir,
   vala_args: geary_vala_args,
   c_args: plugin_c_args,
@@ -24,4 +45,16 @@ i18n.merge_file(
   install: true,
   install_dir: plugin_dest,
   install_rpath: client_lib_dir,
+
+plugin_test = executable(
+  'plugin-test',
+  test_src,
+  dependencies: plugin_dependencies + [ vala_unit_dep ],
+  link_with: lib,
+  include_directories: config_h_dir,
+  vala_args: geary_vala_args,
+  c_args: plugin_c_args,
+  install: false
 )
+
+test(plugin_name + '-test', plugin_test)
diff --git a/src/client/plugin/mail-merge/util-csv.vala b/src/client/plugin/mail-merge/util-csv.vala
new file mode 100644
index 000000000..037e10e58
--- /dev/null
+++ b/src/client/plugin/mail-merge/util-csv.vala
@@ -0,0 +1,228 @@
+/*
+ * Copyright © 2020 Michael Gratton <mike vee net>.
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */
+
+
+/** Denotes CSV-specific error conditions. */
+public errordomain Plugin.Util.Csv.DataError {
+
+    /** The input stream contained non-text data. */
+    NON_TEXT_DATA,
+
+    /** The end of line terminator could not be determined. */
+    UNKNOWN_EOL,
+
+    /** The end of line terminator was not found. */
+    EOL_NOT_FOUND;
+}
+
+
+/**
+ * A simple comma-separated value (CSV) reader.
+ *
+ * To use this class, simply construct an instance start calling
+ * {@link read_record}.
+ */
+public class Plugin.Util.Csv.Reader : Geary.BaseObject {
+
+
+    // UTF byte prefixes indicating multi-byte codepoints
+    private const uint8 UTF8_DOUBLE = 0x06;    // `110`
+    private const uint8 UTF8_TRIPLE = 0x0E;    // `1110`
+    private const uint8 UTF8_QUADRUPLE = 0x1E; // `11110`
+    private const uint8 UTF8_TRAILER = 0x02;   // `10`
+    private const unichar UNICODE_REPLACEMENT_CHAR = 0xFFFD;
+
+
+    private static inline bool is_text_char(unichar c) {
+        return (
+            c == 0x20 ||
+            c == 0x21 ||
+            (c >= 0x23 && c <= 0x2B) ||
+            (c >= 0x2D && c <= 0x7E) ||
+            c >= 0x80
+        );
+    }
+
+
+    public string? line_ending { get; set; default = null; }
+    public char field_separator { get; set; default = ','; }
+
+    private GLib.InputStream input;
+    private GLib.Cancellable? cancellable;
+
+    private unichar next_char = '\0';
+    private uint last_record_length = 0;
+
+
+    /**
+     * Constructs a new CSV file reader.
+     *
+     * The reader is primed during construction, so the given stream
+     * will be read from. As such, an IOError or other error may occur
+     * during construction.
+     *
+     * If the given cancellable is not null, it will be used when
+     * performing I/O operations on the given input stream.
+     */
+    public async Reader(GLib.InputStream input,
+                        GLib.Cancellable? cancellable = null)
+        throws GLib.Error{
+        this.input = new GLib.BufferedInputStream(input);
+        this.cancellable = cancellable ?? new GLib.Cancellable();
+
+        // prime the look-ahead
+        yield read_char();
+    }
+
+    public async string[]? read_record() throws GLib.Error {
+        string[]? record = null;
+        if (!this.input.is_closed()) {
+            record = new string[this.last_record_length];
+            int next_field = 0;
+            while (true) {
+                string field = yield read_field();
+                if (next_field < record.length) {
+                    record[next_field] = field;
+                } else {
+                    record += field;
+                }
+                ++next_field;
+                if (this.next_char == this.field_separator) {
+                    // skip the field sep
+                    yield read_char();
+                } else {
+                    break;
+                }
+            }
+            if (!this.input.is_closed()) {
+                yield read_eol();
+            }
+        }
+        this.last_record_length = record.length;
+        return record;
+    }
+
+    private async string read_field() throws GLib.Error {
+        bool quoted = (this.next_char == '"');
+        if (quoted) {
+            // skip the quote marker
+            yield read_char();
+        }
+
+        GLib.StringBuilder buf = new GLib.StringBuilder();
+        while (!this.input.is_closed() &&
+               (quoted || (
+                   this.next_char != this.field_separator &&
+                   is_text_char(this.next_char)))) {
+            unichar c = yield read_char();
+            if (quoted && c == '"') {
+                if (this.next_char == '"') {
+                    buf.append_c('"');
+                    yield read_char();
+                } else {
+                    quoted = false;
+                }
+            } else {
+                buf.append_unichar(c);
+            }
+        }
+        return buf.str;
+    }
+
+    private async void read_eol() throws GLib.Error {
+        if (this.line_ending == null || this.line_ending == "") {
+            // Don't know what the line ending currently is, so guess
+            // it
+            unichar c = yield read_char();
+            if (c == '\n') {
+                this.line_ending = "\n";
+            } else if (c == '\r') {
+                if (this.next_char == '\n') {
+                    // consume it
+                    yield read_char();
+                    this.line_ending = "\r\n";
+                } else {
+                    this.line_ending = "\r";
+                }
+            } else {
+                throw new DataError.UNKNOWN_EOL(
+                    "Unable to determine end of line character 0x%02x", c
+                );
+            }
+        } else {
+            // Known line ending, so check for it
+            unichar c;
+            for (int i = 0; i < this.line_ending.length; i++) {
+                c = yield read_char();
+                if (this.line_ending[i] != c) {
+                    throw new DataError.EOL_NOT_FOUND(
+                        "Unexpected end of line character: 0x%02X", c
+                    );
+                }
+            }
+        }
+    }
+
+    private async unichar read_char() throws GLib.Error {
+        unichar c = this.next_char;
+
+        // allocated on the stack
+        uint8 buf[1];
+        size_t bytes_read = 0;
+        yield this.input.read_all_async(
+            buf, GLib.Priority.DEFAULT, this.cancellable, out bytes_read
+        );
+        if (bytes_read > 0) {
+            uint8 next = buf[0];
+            if (next == 0x00) {
+                throw new DataError.NON_TEXT_DATA("Read null byte");
+            }
+            if (next <= 0x7F) {
+                this.next_char = (unichar) next;
+            } else {
+                uint to_read = 0;
+                if (next >> 5 == UTF8_DOUBLE) {
+                    to_read = 1;
+                } else if (next >> 4 == UTF8_TRIPLE) {
+                    to_read = 2;
+                } else if (next >> 3 == UTF8_QUADRUPLE) {
+                    to_read = 3;
+                } else {
+                    throw new DataError.NON_TEXT_DATA("Invalid UTF-8 data");
+                }
+
+                uint8 utf[5];
+                utf[0] = next;
+                utf[to_read + 1] = 0x00;
+                for (int i = 0; i < to_read; i++) {
+                    yield this.input.read_all_async(
+                        buf,
+                        GLib.Priority.DEFAULT,
+                        this.cancellable,
+                        out bytes_read
+                    );
+                    if (bytes_read == 1 && buf[0] >> 6 == UTF8_TRAILER) {
+                        utf[i + 1] = buf[0];
+                    } else {
+                        utf[i + 1] = 0x00;
+                        break;
+                    }
+                }
+
+                this.next_char = ((string) utf).get_char();
+                if (!this.next_char.validate()) {
+                    this.next_char = UNICODE_REPLACEMENT_CHAR;
+                }
+            }
+        } else {
+            this.next_char = '\0';
+            yield this.input.close_async();
+        }
+        return c;
+    }
+
+}
\ No newline at end of file
diff --git a/src/client/plugin/mail-merge/util-test.vala b/src/client/plugin/mail-merge/util-test.vala
new file mode 100644
index 000000000..5be404ffc
--- /dev/null
+++ b/src/client/plugin/mail-merge/util-test.vala
@@ -0,0 +1,181 @@
+/*
+ * Copyright © 2020 Michael Gratton <mike vee net>
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */
+
+public class ReaderTests : ValaUnit.TestCase {
+
+
+    public ReaderTests() {
+        base("ReaderTests");
+        add_test("read_simple_lf", read_simple_lf);
+        add_test("read_simple_crlf", read_simple_crlf);
+        add_test("read_no_trailing_new_line", read_no_trailing_new_line);
+        add_test("read_empty_records", read_empty_records);
+        add_test("read_multi_byte_chars", read_multi_byte_chars);
+        add_test("read_quoted", read_quoted);
+    }
+
+    public void read_simple_lf() throws GLib.Error {
+        const string CSV = "foo,bar,baz\n1,2,3\n";
+
+        new_reader.begin(CSV.data, this.async_completion);
+        var reader = new_reader.end(async_result());
+
+        reader.read_record.begin(this.async_completion);
+        var headers = reader.read_record.end(async_result());
+        assert_array(
+            headers
+        ).size(3).first_is("foo").at_index_is(1, "bar").at_index_is(2, "baz");
+
+        reader.read_record.begin(this.async_completion);
+        var data = reader.read_record.end(async_result());
+        assert_array(
+            data
+        ).size(3).first_is("1").at_index_is(1, "2").at_index_is(2, "3");
+
+        // Ensure both EOF and subsequent calls also return null
+
+        reader.read_record.begin(this.async_completion);
+        var eof1 = reader.read_record.end(async_result());
+        assert_array_is_null(eof1);
+
+        reader.read_record.begin(this.async_completion);
+        var eof2 = reader.read_record.end(async_result());
+        assert_array_is_null(eof2);
+    }
+
+    public void read_simple_crlf() throws GLib.Error {
+        const string CSV = "foo,bar,baz\r\n1,2,3\r\n";
+
+        new_reader.begin(CSV.data, this.async_completion);
+        var reader = new_reader.end(async_result());
+
+        reader.read_record.begin(this.async_completion);
+        var headers = reader.read_record.end(async_result());
+        assert_array(
+            headers
+        ).size(3).first_is("foo").at_index_is(1, "bar").at_index_is(2, "baz");
+
+        reader.read_record.begin(this.async_completion);
+        var data = reader.read_record.end(async_result());
+        assert_array(
+            data
+        ).size(3).first_is("1").at_index_is(1, "2").at_index_is(2, "3");
+
+        // Ensure both EOF and subsequent calls also return null
+
+        reader.read_record.begin(this.async_completion);
+        var eof1 = reader.read_record.end(async_result());
+        assert_array_is_null(eof1);
+
+        reader.read_record.begin(this.async_completion);
+        var eof2 = reader.read_record.end(async_result());
+        assert_array_is_null(eof2);
+    }
+
+    public void read_no_trailing_new_line() throws GLib.Error {
+        const string CSV = "foo,bar,baz";
+
+        new_reader.begin(CSV.data, this.async_completion);
+        var reader = new_reader.end(async_result());
+
+        reader.read_record.begin(this.async_completion);
+        var headers = reader.read_record.end(async_result());
+        assert_array(
+            headers
+        ).size(3).first_is("foo").at_index_is(1, "bar").at_index_is(2, "baz");
+
+        reader.read_record.begin(this.async_completion);
+        var eof1 = reader.read_record.end(async_result());
+        assert_array_is_null(eof1);
+    }
+
+    public void read_empty_records() throws GLib.Error {
+        const string CSV = ",,";
+
+        new_reader.begin(CSV.data, this.async_completion);
+        var reader = new_reader.end(async_result());
+
+        reader.read_record.begin(this.async_completion);
+        var headers = reader.read_record.end(async_result());
+        assert_array(
+            headers
+        ).size(3).first_is("").at_index_is(1, "").at_index_is(2, "");
+
+        reader.read_record.begin(this.async_completion);
+        var eof1 = reader.read_record.end(async_result());
+        assert_array_is_null(eof1);
+    }
+
+    public void read_multi_byte_chars() throws GLib.Error {
+        const string CSV = "á,☃,🤘";
+
+        new_reader.begin(CSV.data, this.async_completion);
+        var reader = new_reader.end(async_result());
+
+        reader.read_record.begin(this.async_completion);
+        var headers = reader.read_record.end(async_result());
+        assert_array(
+            headers
+        ).size(3).first_is("á").at_index_is(1, "☃").at_index_is(2, "🤘");
+
+        reader.read_record.begin(this.async_completion);
+        var eof1 = reader.read_record.end(async_result());
+        assert_array_is_null(eof1);
+    }
+
+    public void read_quoted() throws GLib.Error {
+        const string CSV = """"simple","foo""bar","foo,bar","foo
+bar",""""""";
+
+        new_reader.begin(CSV.data, this.async_completion);
+        var reader = new_reader.end(async_result());
+
+        reader.read_record.begin(this.async_completion);
+        var headers = reader.read_record.end(async_result());
+        assert_array(
+            headers
+        ).size(5)
+        .first_is("simple")
+        .at_index_is(1, "foo\"bar")
+        .at_index_is(2, "foo,bar")
+        .at_index_is(3, "foo\nbar")
+        .at_index_is(4, "\"");
+
+        reader.read_record.begin(this.async_completion);
+        var eof1 = reader.read_record.end(async_result());
+        assert_array_is_null(eof1);
+    }
+
+    private async Plugin.Util.Csv.Reader new_reader(uint8[] data)
+        throws GLib.Error {
+        return yield new Plugin.Util.Csv.Reader(
+            new GLib.MemoryInputStream.from_data(data, null)
+        );
+    }
+
+}
+
+
+int main(string[] args) {
+    Test.init(ref args);
+
+    typeof(ReaderTests).name();
+
+    TestSuite root = TestSuite.get_root();
+    root.add_suite(new ReaderTests().suite);
+
+    MainLoop loop = new MainLoop();
+    int ret = -1;
+    Idle.add(() => {
+            ret = Test.run();
+            loop.quit();
+            return false;
+        });
+
+    loop.run();
+    return ret;
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]