[gnome-commander/googletest] Adds unit test for searching an UTF-8 string in internal viewer

From: Uwe Scholz <uwescholz src gnome org>
To: commits-list gnome org
Cc:
Subject: [gnome-commander/googletest] Adds unit test for searching an UTF-8 string in internal viewer
Date: Sun, 1 Nov 2015 11:27:00 +0000 (UTC)
commit 99fcb297b6f8b72c4d6e7a3d13109638cb53186f
Author: Uwe Scholz <uwescholz src gnome org>
Date:   Sun Nov 1 12:23:51 2015 +0100

    Adds unit test for searching an UTF-8 string in internal viewer

 tests/Makefile.am          |    2 +-
 tests/iv_bm_search_test.cc |   78 +++++++++++++++++++++++++++++++++++++++++++-
 tests/iv_bm_search_test.h  |    2 +
 3 files changed, 80 insertions(+), 2 deletions(-)
---
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 05d8bd8..3dc8ac0 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -23,7 +23,7 @@ intviewer_fileops_SOURCES = iv_fileops_test.cc gcmd_tests_main.cc
 intviewer_fileops_CXXFLAGS = $(INTVLIBS)
 intviewer_fileops_LDFLAGS = $(INTVLIBS) -lgtest
 
-intviewer_bm_byte_SOURCES = iv_bm_byte_test.cc gcmd_tests_main.cc 
+intviewer_bm_byte_SOURCES = iv_bm_search_test.cc gcmd_tests_main.cc 
 intviewer_bm_byte_CXXFLAGS = $(INTVLIBS)
 intviewer_bm_byte_LDFLAGS = $(INTVLIBS) -lgtest
 
diff --git a/tests/iv_bm_search_test.cc b/tests/iv_bm_search_test.cc
index 16e3bd3..96c3bd1 100644
--- a/tests/iv_bm_search_test.cc
+++ b/tests/iv_bm_search_test.cc
@@ -28,8 +28,13 @@
  */
 
 #include "gtest/gtest.h"
-#include <iv_bm_byte_test.h>
+#include <iv_bm_search_test.h>
 
+/**
+ * In this test a short pattern of integers is searched inside a 
+ * bigger array of integers.
+ * (see definitions in @link BmByteTest @endlink)
+ */
 TEST_F(BmByteTest, match_test) {
 
     GViewerBMByteData *data;
@@ -78,3 +83,74 @@ TEST_F(BmByteTest, match_test) {
 
     free_bm_byte_data(data);
 }
+
+
+/**
+ * In this test a pattern of UTF-8 encoded letters is searched in a text
+ * with UTF-8 encoded letters.
+ */
+TEST_F(BmByteTest, chartype_test) {
+    /* 
+     * This is a valid UTF8 string, with four hebrew letters in it:
+     *  0xD7 0x90 = Aleph (Unicode U+5D0)
+     *  0xD7 0x95 = Vav   (Unicode U+5D5)
+     *  0xD7 0x94 = He    (Unicode U+5D4)
+     *  0xD7 0x91 = Bet   (Unicode U+5D1)
+     *  (Aleph-Vav-He-Bet, pronounced "ohev", means "love" in hebrew, FYI :-)
+     */
+    const gchar *pattern = "I \xd7\x90\xd7\x95\xd7\x94\xd7\x91 you";
+
+
+    // This is a valid UTF8 text, with pangrams in several languages (I hope I got it right...)
+    const gchar *text = \
+        "English:" \
+        "The quick brown fox jumps over the lazy dog" \
+        "Irish:" \
+        "An \xe1\xb8\x83 fuil do \xc4\x8bro\xc3\xad ag buala\xe1\xb8\x8b \xc3\xb3 \xe1\xb8\x9f ait\xc3\xados 
an \xc4\xa1r\xc3\xa1 a \xe1\xb9\x81 eall lena \xe1\xb9\x97\xc3\xb3g \xc3\xa9 ada \xc3\xb3 
\xe1\xb9\xa1l\xc3\xad do leasa \xe1\xb9\xab\xc3\xba\x3f" \
+        "Swedish:" \
+        "Flygande b\xc3\xa4 ckasiner s\xc3\xb6ka strax hwila p\xc3\xa5 mjuka tuvor" \
+        "(our match: I \xd7\x90\xd7\x95\xd7\x94\xd7\x91 You)" \
+        "Hebrew:" \
+        "\xd7\x96\xd7\x94 \xd7\x9b\xd7\x99\xd7\xa3 \xd7\xa1\xd7\xaa\xd7\x9d 
\xd7\x9c\xd7\xa9\xd7\x9e\xd7\x95\xd7\xa2 \xd7\x90\xd7\x99\xd7\x9a \xd7\xaa\xd7\xa0\xd7\xa6\xd7\x97 
\xd7\xa7\xd7\xa8\xd7\xa4\xd7\x93 \xd7\xa2\xd7\xa5 \xd7\x98\xd7\x95\xd7\x91 \xd7\x91\xd7\x92\xd7\x9f" \
+        "French:" \
+        "Les na\xc3\xaf fs \xc3\xa6githales h\xc3\xa2tifs pondant \xc3\xa0 No\xc3\xabl o\xc3\xb9 il 
g\xc3\xa8le sont s\xc3\xbbrs d\x27\xc3\xaatre d\xc3\xa9\xc3\xa7us et de voir leurs dr\xc3\xb4les 
d\x27\xc5\x93ufs ab\xc3\xaem\xc3\xa9s\x2e";
+
+    int i;
+    int j;
+    int m;
+    int n;
+    char_type *ct_text;
+    int  ct_text_len;
+
+    GViewerBMChartypeData *data;
+
+    data = create_bm_chartype_data(pattern,FALSE);
+
+    // Convert the UTF8 text string to a chartype array
+    ct_text = convert_utf8_to_chartype_array(text, ct_text_len);
+    ASSERT_TRUE(ct_text) << "Failed to convert text to 'char_type' array (maybe 'text' is not a valid UTF8 
string?)\n";
+
+    // Do the actual search
+    m = data->pattern_len;
+    n = ct_text_len;
+    j = 0;
+    int found_at = 0;
+    while (j <= n - m)
+    {
+        for (i = m - 1; i >= 0 && bm_chartype_equal(data,i,ct_text[i + j]); --i);
+
+        if (i < 0)
+        {
+            printf(" Found match at offset = %d\n", j);
+            found_at = j;
+            j += bm_chartype_get_good_match_advancement(data);
+
+        }
+        else
+            j += bm_chartype_get_advancement(data, i, ct_text[i+j]);
+    }
+    ASSERT_EQ(found_at, 217) << "String with UTF-8 letters not found in text where it should be found.";
+
+    g_free(ct_text);
+    free_bm_chartype_data(data);
+}
diff --git a/tests/iv_bm_search_test.h b/tests/iv_bm_search_test.h
index 99bc151..7572ce8 100644
--- a/tests/iv_bm_search_test.h
+++ b/tests/iv_bm_search_test.h
@@ -26,6 +26,8 @@
 
 #include <intviewer/bm_byte.h>
 #include <intviewer/gvtypes.h>
+#include <intviewer/viewer-utils.h>
+#include <intviewer/bm_chartype.h>
 
 /**
  *  The fixture for testing class BmByteTest.
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]