[geary/mjog/mail-merge-plugin: 40/71] Plugin.MailMerge: Add simple CSV reader with unit tests
- From: Michael Gratton <mjog src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [geary/mjog/mail-merge-plugin: 40/71] Plugin.MailMerge: Add simple CSV reader with unit tests
- Date: Wed, 5 Aug 2020 05:00:48 +0000 (UTC)
commit 593f3e2dc3b8f3dda97a8d91a68bdba4b6d761aa
Author: Michael Gratton <mike vee net>
Date: Thu May 21 13:49:56 2020 +1000
Plugin.MailMerge: Add simple CSV reader with unit tests
src/client/plugin/mail-merge/meson.build | 37 ++++-
src/client/plugin/mail-merge/util-csv.vala | 228 ++++++++++++++++++++++++++++
src/client/plugin/mail-merge/util-test.vala | 181 ++++++++++++++++++++++
3 files changed, 444 insertions(+), 2 deletions(-)
---
diff --git a/src/client/plugin/mail-merge/meson.build b/src/client/plugin/mail-merge/meson.build
index 218989125..e3d5e555a 100644
--- a/src/client/plugin/mail-merge/meson.build
+++ b/src/client/plugin/mail-merge/meson.build
@@ -1,14 +1,35 @@
plugin_name = 'mail-merge'
-plugin_src = files(plugin_name + '.vala')
+plugin_src = files(
+ plugin_name + '.vala'
+)
plugin_data = plugin_name + plugin_data_suffix
plugin_dest = plugins_dir / plugin_name
-shared_module(
+lib_src = files(
+ 'util-csv.vala'
+)
+
+test_src = files(
+ 'util-test.vala'
+)
+
+lib = static_library(
+ 'util',
+ sources: lib_src,
+ dependencies: plugin_dependencies,
+ include_directories: config_h_dir,
+ vala_args: geary_vala_args,
+ c_args: plugin_c_args,
+ install: false
+)
+
+plugin = shared_module(
plugin_name,
sources: plugin_src,
dependencies: plugin_dependencies,
+ link_with: lib,
include_directories: config_h_dir,
vala_args: geary_vala_args,
c_args: plugin_c_args,
@@ -24,4 +45,16 @@ i18n.merge_file(
install: true,
install_dir: plugin_dest,
install_rpath: client_lib_dir,
+
+plugin_test = executable(
+ 'plugin-test',
+ test_src,
+ dependencies: plugin_dependencies + [ vala_unit_dep ],
+ link_with: lib,
+ include_directories: config_h_dir,
+ vala_args: geary_vala_args,
+ c_args: plugin_c_args,
+ install: false
)
+
+test(plugin_name + '-test', plugin_test)
diff --git a/src/client/plugin/mail-merge/util-csv.vala b/src/client/plugin/mail-merge/util-csv.vala
new file mode 100644
index 000000000..037e10e58
--- /dev/null
+++ b/src/client/plugin/mail-merge/util-csv.vala
@@ -0,0 +1,228 @@
+/*
+ * Copyright © 2020 Michael Gratton <mike vee net>.
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */
+
+
+/** Denotes CSV-specific error conditions. */
+public errordomain Plugin.Util.Csv.DataError {
+
+ /** The input stream contained non-text data. */
+ NON_TEXT_DATA,
+
+ /** The end of line terminator could not be determined. */
+ UNKNOWN_EOL,
+
+ /** The end of line terminator was not found. */
+ EOL_NOT_FOUND;
+}
+
+
+/**
+ * A simple comma-separated value (CSV) reader.
+ *
+ * To use this class, simply construct an instance start calling
+ * {@link read_record}.
+ */
+public class Plugin.Util.Csv.Reader : Geary.BaseObject {
+
+
+ // UTF byte prefixes indicating multi-byte codepoints
+ private const uint8 UTF8_DOUBLE = 0x06; // `110`
+ private const uint8 UTF8_TRIPLE = 0x0E; // `1110`
+ private const uint8 UTF8_QUADRUPLE = 0x1E; // `11110`
+ private const uint8 UTF8_TRAILER = 0x02; // `10`
+ private const unichar UNICODE_REPLACEMENT_CHAR = 0xFFFD;
+
+
+ private static inline bool is_text_char(unichar c) {
+ return (
+ c == 0x20 ||
+ c == 0x21 ||
+ (c >= 0x23 && c <= 0x2B) ||
+ (c >= 0x2D && c <= 0x7E) ||
+ c >= 0x80
+ );
+ }
+
+
+ public string? line_ending { get; set; default = null; }
+ public char field_separator { get; set; default = ','; }
+
+ private GLib.InputStream input;
+ private GLib.Cancellable? cancellable;
+
+ private unichar next_char = '\0';
+ private uint last_record_length = 0;
+
+
+ /**
+ * Constructs a new CSV file reader.
+ *
+ * The reader is primed during construction, so the given stream
+ * will be read from. As such, an IOError or other error may occur
+ * during construction.
+ *
+ * If the given cancellable is not null, it will be used when
+ * performing I/O operations on the given input stream.
+ */
+ public async Reader(GLib.InputStream input,
+ GLib.Cancellable? cancellable = null)
+ throws GLib.Error{
+ this.input = new GLib.BufferedInputStream(input);
+ this.cancellable = cancellable ?? new GLib.Cancellable();
+
+ // prime the look-ahead
+ yield read_char();
+ }
+
+ public async string[]? read_record() throws GLib.Error {
+ string[]? record = null;
+ if (!this.input.is_closed()) {
+ record = new string[this.last_record_length];
+ int next_field = 0;
+ while (true) {
+ string field = yield read_field();
+ if (next_field < record.length) {
+ record[next_field] = field;
+ } else {
+ record += field;
+ }
+ ++next_field;
+ if (this.next_char == this.field_separator) {
+ // skip the field sep
+ yield read_char();
+ } else {
+ break;
+ }
+ }
+ if (!this.input.is_closed()) {
+ yield read_eol();
+ }
+ }
+ this.last_record_length = record.length;
+ return record;
+ }
+
+ private async string read_field() throws GLib.Error {
+ bool quoted = (this.next_char == '"');
+ if (quoted) {
+ // skip the quote marker
+ yield read_char();
+ }
+
+ GLib.StringBuilder buf = new GLib.StringBuilder();
+ while (!this.input.is_closed() &&
+ (quoted || (
+ this.next_char != this.field_separator &&
+ is_text_char(this.next_char)))) {
+ unichar c = yield read_char();
+ if (quoted && c == '"') {
+ if (this.next_char == '"') {
+ buf.append_c('"');
+ yield read_char();
+ } else {
+ quoted = false;
+ }
+ } else {
+ buf.append_unichar(c);
+ }
+ }
+ return buf.str;
+ }
+
+ private async void read_eol() throws GLib.Error {
+ if (this.line_ending == null || this.line_ending == "") {
+ // Don't know what the line ending currently is, so guess
+ // it
+ unichar c = yield read_char();
+ if (c == '\n') {
+ this.line_ending = "\n";
+ } else if (c == '\r') {
+ if (this.next_char == '\n') {
+ // consume it
+ yield read_char();
+ this.line_ending = "\r\n";
+ } else {
+ this.line_ending = "\r";
+ }
+ } else {
+ throw new DataError.UNKNOWN_EOL(
+ "Unable to determine end of line character 0x%02x", c
+ );
+ }
+ } else {
+ // Known line ending, so check for it
+ unichar c;
+ for (int i = 0; i < this.line_ending.length; i++) {
+ c = yield read_char();
+ if (this.line_ending[i] != c) {
+ throw new DataError.EOL_NOT_FOUND(
+ "Unexpected end of line character: 0x%02X", c
+ );
+ }
+ }
+ }
+ }
+
+ private async unichar read_char() throws GLib.Error {
+ unichar c = this.next_char;
+
+ // allocated on the stack
+ uint8 buf[1];
+ size_t bytes_read = 0;
+ yield this.input.read_all_async(
+ buf, GLib.Priority.DEFAULT, this.cancellable, out bytes_read
+ );
+ if (bytes_read > 0) {
+ uint8 next = buf[0];
+ if (next == 0x00) {
+ throw new DataError.NON_TEXT_DATA("Read null byte");
+ }
+ if (next <= 0x7F) {
+ this.next_char = (unichar) next;
+ } else {
+ uint to_read = 0;
+ if (next >> 5 == UTF8_DOUBLE) {
+ to_read = 1;
+ } else if (next >> 4 == UTF8_TRIPLE) {
+ to_read = 2;
+ } else if (next >> 3 == UTF8_QUADRUPLE) {
+ to_read = 3;
+ } else {
+ throw new DataError.NON_TEXT_DATA("Invalid UTF-8 data");
+ }
+
+ uint8 utf[5];
+ utf[0] = next;
+ utf[to_read + 1] = 0x00;
+ for (int i = 0; i < to_read; i++) {
+ yield this.input.read_all_async(
+ buf,
+ GLib.Priority.DEFAULT,
+ this.cancellable,
+ out bytes_read
+ );
+ if (bytes_read == 1 && buf[0] >> 6 == UTF8_TRAILER) {
+ utf[i + 1] = buf[0];
+ } else {
+ utf[i + 1] = 0x00;
+ break;
+ }
+ }
+
+ this.next_char = ((string) utf).get_char();
+ if (!this.next_char.validate()) {
+ this.next_char = UNICODE_REPLACEMENT_CHAR;
+ }
+ }
+ } else {
+ this.next_char = '\0';
+ yield this.input.close_async();
+ }
+ return c;
+ }
+
+}
\ No newline at end of file
diff --git a/src/client/plugin/mail-merge/util-test.vala b/src/client/plugin/mail-merge/util-test.vala
new file mode 100644
index 000000000..5be404ffc
--- /dev/null
+++ b/src/client/plugin/mail-merge/util-test.vala
@@ -0,0 +1,181 @@
+/*
+ * Copyright © 2020 Michael Gratton <mike vee net>
+ *
+ * This software is licensed under the GNU Lesser General Public License
+ * (version 2.1 or later). See the COPYING file in this distribution.
+ */
+
+public class ReaderTests : ValaUnit.TestCase {
+
+
+ public ReaderTests() {
+ base("ReaderTests");
+ add_test("read_simple_lf", read_simple_lf);
+ add_test("read_simple_crlf", read_simple_crlf);
+ add_test("read_no_trailing_new_line", read_no_trailing_new_line);
+ add_test("read_empty_records", read_empty_records);
+ add_test("read_multi_byte_chars", read_multi_byte_chars);
+ add_test("read_quoted", read_quoted);
+ }
+
+ public void read_simple_lf() throws GLib.Error {
+ const string CSV = "foo,bar,baz\n1,2,3\n";
+
+ new_reader.begin(CSV.data, this.async_completion);
+ var reader = new_reader.end(async_result());
+
+ reader.read_record.begin(this.async_completion);
+ var headers = reader.read_record.end(async_result());
+ assert_array(
+ headers
+ ).size(3).first_is("foo").at_index_is(1, "bar").at_index_is(2, "baz");
+
+ reader.read_record.begin(this.async_completion);
+ var data = reader.read_record.end(async_result());
+ assert_array(
+ data
+ ).size(3).first_is("1").at_index_is(1, "2").at_index_is(2, "3");
+
+ // Ensure both EOF and subsequent calls also return null
+
+ reader.read_record.begin(this.async_completion);
+ var eof1 = reader.read_record.end(async_result());
+ assert_array_is_null(eof1);
+
+ reader.read_record.begin(this.async_completion);
+ var eof2 = reader.read_record.end(async_result());
+ assert_array_is_null(eof2);
+ }
+
+ public void read_simple_crlf() throws GLib.Error {
+ const string CSV = "foo,bar,baz\r\n1,2,3\r\n";
+
+ new_reader.begin(CSV.data, this.async_completion);
+ var reader = new_reader.end(async_result());
+
+ reader.read_record.begin(this.async_completion);
+ var headers = reader.read_record.end(async_result());
+ assert_array(
+ headers
+ ).size(3).first_is("foo").at_index_is(1, "bar").at_index_is(2, "baz");
+
+ reader.read_record.begin(this.async_completion);
+ var data = reader.read_record.end(async_result());
+ assert_array(
+ data
+ ).size(3).first_is("1").at_index_is(1, "2").at_index_is(2, "3");
+
+ // Ensure both EOF and subsequent calls also return null
+
+ reader.read_record.begin(this.async_completion);
+ var eof1 = reader.read_record.end(async_result());
+ assert_array_is_null(eof1);
+
+ reader.read_record.begin(this.async_completion);
+ var eof2 = reader.read_record.end(async_result());
+ assert_array_is_null(eof2);
+ }
+
+ public void read_no_trailing_new_line() throws GLib.Error {
+ const string CSV = "foo,bar,baz";
+
+ new_reader.begin(CSV.data, this.async_completion);
+ var reader = new_reader.end(async_result());
+
+ reader.read_record.begin(this.async_completion);
+ var headers = reader.read_record.end(async_result());
+ assert_array(
+ headers
+ ).size(3).first_is("foo").at_index_is(1, "bar").at_index_is(2, "baz");
+
+ reader.read_record.begin(this.async_completion);
+ var eof1 = reader.read_record.end(async_result());
+ assert_array_is_null(eof1);
+ }
+
+ public void read_empty_records() throws GLib.Error {
+ const string CSV = ",,";
+
+ new_reader.begin(CSV.data, this.async_completion);
+ var reader = new_reader.end(async_result());
+
+ reader.read_record.begin(this.async_completion);
+ var headers = reader.read_record.end(async_result());
+ assert_array(
+ headers
+ ).size(3).first_is("").at_index_is(1, "").at_index_is(2, "");
+
+ reader.read_record.begin(this.async_completion);
+ var eof1 = reader.read_record.end(async_result());
+ assert_array_is_null(eof1);
+ }
+
+ public void read_multi_byte_chars() throws GLib.Error {
+ const string CSV = "á,☃,🤘";
+
+ new_reader.begin(CSV.data, this.async_completion);
+ var reader = new_reader.end(async_result());
+
+ reader.read_record.begin(this.async_completion);
+ var headers = reader.read_record.end(async_result());
+ assert_array(
+ headers
+ ).size(3).first_is("á").at_index_is(1, "☃").at_index_is(2, "🤘");
+
+ reader.read_record.begin(this.async_completion);
+ var eof1 = reader.read_record.end(async_result());
+ assert_array_is_null(eof1);
+ }
+
+ public void read_quoted() throws GLib.Error {
+ const string CSV = """"simple","foo""bar","foo,bar","foo
+bar",""""""";
+
+ new_reader.begin(CSV.data, this.async_completion);
+ var reader = new_reader.end(async_result());
+
+ reader.read_record.begin(this.async_completion);
+ var headers = reader.read_record.end(async_result());
+ assert_array(
+ headers
+ ).size(5)
+ .first_is("simple")
+ .at_index_is(1, "foo\"bar")
+ .at_index_is(2, "foo,bar")
+ .at_index_is(3, "foo\nbar")
+ .at_index_is(4, "\"");
+
+ reader.read_record.begin(this.async_completion);
+ var eof1 = reader.read_record.end(async_result());
+ assert_array_is_null(eof1);
+ }
+
+ private async Plugin.Util.Csv.Reader new_reader(uint8[] data)
+ throws GLib.Error {
+ return yield new Plugin.Util.Csv.Reader(
+ new GLib.MemoryInputStream.from_data(data, null)
+ );
+ }
+
+}
+
+
+int main(string[] args) {
+ Test.init(ref args);
+
+ typeof(ReaderTests).name();
+
+ TestSuite root = TestSuite.get_root();
+ root.add_suite(new ReaderTests().suite);
+
+ MainLoop loop = new MainLoop();
+ int ret = -1;
+ Idle.add(() => {
+ ret = Test.run();
+ loop.quit();
+ return false;
+ });
+
+ loop.run();
+ return ret;
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]