[gnome-commander/googletest] Adds unit test for searching an UTF-8 string in internal viewer
- From: Uwe Scholz <uwescholz src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-commander/googletest] Adds unit test for searching an UTF-8 string in internal viewer
- Date: Sun, 1 Nov 2015 11:27:00 +0000 (UTC)
commit 99fcb297b6f8b72c4d6e7a3d13109638cb53186f
Author: Uwe Scholz <uwescholz src gnome org>
Date: Sun Nov 1 12:23:51 2015 +0100
Adds unit test for searching an UTF-8 string in internal viewer
tests/Makefile.am | 2 +-
tests/iv_bm_search_test.cc | 78 +++++++++++++++++++++++++++++++++++++++++++-
tests/iv_bm_search_test.h | 2 +
3 files changed, 80 insertions(+), 2 deletions(-)
---
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 05d8bd8..3dc8ac0 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -23,7 +23,7 @@ intviewer_fileops_SOURCES = iv_fileops_test.cc gcmd_tests_main.cc
intviewer_fileops_CXXFLAGS = $(INTVLIBS)
intviewer_fileops_LDFLAGS = $(INTVLIBS) -lgtest
-intviewer_bm_byte_SOURCES = iv_bm_byte_test.cc gcmd_tests_main.cc
+intviewer_bm_byte_SOURCES = iv_bm_search_test.cc gcmd_tests_main.cc
intviewer_bm_byte_CXXFLAGS = $(INTVLIBS)
intviewer_bm_byte_LDFLAGS = $(INTVLIBS) -lgtest
diff --git a/tests/iv_bm_search_test.cc b/tests/iv_bm_search_test.cc
index 16e3bd3..96c3bd1 100644
--- a/tests/iv_bm_search_test.cc
+++ b/tests/iv_bm_search_test.cc
@@ -28,8 +28,13 @@
*/
#include "gtest/gtest.h"
-#include <iv_bm_byte_test.h>
+#include <iv_bm_search_test.h>
+/**
+ * In this test a short pattern of integers is searched inside a
+ * bigger array of integers.
+ * (see definitions in @link BmByteTest @endlink)
+ */
TEST_F(BmByteTest, match_test) {
GViewerBMByteData *data;
@@ -78,3 +83,74 @@ TEST_F(BmByteTest, match_test) {
free_bm_byte_data(data);
}
+
+
+/**
+ * In this test a pattern of UTF-8 encoded letters is searched in a text
+ * with UTF-8 encoded letters.
+ */
+TEST_F(BmByteTest, chartype_test) {
+ /*
+ * This is a valid UTF8 string, with four hebrew letters in it:
+ * 0xD7 0x90 = Aleph (Unicode U+5D0)
+ * 0xD7 0x95 = Vav (Unicode U+5D5)
+ * 0xD7 0x94 = He (Unicode U+5D4)
+ * 0xD7 0x91 = Bet (Unicode U+5D1)
+ * (Aleph-Vav-He-Bet, pronounced "ohev", means "love" in hebrew, FYI :-)
+ */
+ const gchar *pattern = "I \xd7\x90\xd7\x95\xd7\x94\xd7\x91 you";
+
+
+ // This is a valid UTF8 text, with pangrams in several languages (I hope I got it right...)
+ const gchar *text = \
+ "English:" \
+ "The quick brown fox jumps over the lazy dog" \
+ "Irish:" \
+ "An \xe1\xb8\x83 fuil do \xc4\x8bro\xc3\xad ag buala\xe1\xb8\x8b \xc3\xb3 \xe1\xb8\x9f ait\xc3\xados
an \xc4\xa1r\xc3\xa1 a \xe1\xb9\x81 eall lena \xe1\xb9\x97\xc3\xb3g \xc3\xa9 ada \xc3\xb3
\xe1\xb9\xa1l\xc3\xad do leasa \xe1\xb9\xab\xc3\xba\x3f" \
+ "Swedish:" \
+ "Flygande b\xc3\xa4 ckasiner s\xc3\xb6ka strax hwila p\xc3\xa5 mjuka tuvor" \
+ "(our match: I \xd7\x90\xd7\x95\xd7\x94\xd7\x91 You)" \
+ "Hebrew:" \
+ "\xd7\x96\xd7\x94 \xd7\x9b\xd7\x99\xd7\xa3 \xd7\xa1\xd7\xaa\xd7\x9d
\xd7\x9c\xd7\xa9\xd7\x9e\xd7\x95\xd7\xa2 \xd7\x90\xd7\x99\xd7\x9a \xd7\xaa\xd7\xa0\xd7\xa6\xd7\x97
\xd7\xa7\xd7\xa8\xd7\xa4\xd7\x93 \xd7\xa2\xd7\xa5 \xd7\x98\xd7\x95\xd7\x91 \xd7\x91\xd7\x92\xd7\x9f" \
+ "French:" \
+ "Les na\xc3\xaf fs \xc3\xa6githales h\xc3\xa2tifs pondant \xc3\xa0 No\xc3\xabl o\xc3\xb9 il
g\xc3\xa8le sont s\xc3\xbbrs d\x27\xc3\xaatre d\xc3\xa9\xc3\xa7us et de voir leurs dr\xc3\xb4les
d\x27\xc5\x93ufs ab\xc3\xaem\xc3\xa9s\x2e";
+
+ int i;
+ int j;
+ int m;
+ int n;
+ char_type *ct_text;
+ int ct_text_len;
+
+ GViewerBMChartypeData *data;
+
+ data = create_bm_chartype_data(pattern,FALSE);
+
+ // Convert the UTF8 text string to a chartype array
+ ct_text = convert_utf8_to_chartype_array(text, ct_text_len);
+ ASSERT_TRUE(ct_text) << "Failed to convert text to 'char_type' array (maybe 'text' is not a valid UTF8
string?)\n";
+
+ // Do the actual search
+ m = data->pattern_len;
+ n = ct_text_len;
+ j = 0;
+ int found_at = 0;
+ while (j <= n - m)
+ {
+ for (i = m - 1; i >= 0 && bm_chartype_equal(data,i,ct_text[i + j]); --i);
+
+ if (i < 0)
+ {
+ printf(" Found match at offset = %d\n", j);
+ found_at = j;
+ j += bm_chartype_get_good_match_advancement(data);
+
+ }
+ else
+ j += bm_chartype_get_advancement(data, i, ct_text[i+j]);
+ }
+ ASSERT_EQ(found_at, 217) << "String with UTF-8 letters not found in text where it should be found.";
+
+ g_free(ct_text);
+ free_bm_chartype_data(data);
+}
diff --git a/tests/iv_bm_search_test.h b/tests/iv_bm_search_test.h
index 99bc151..7572ce8 100644
--- a/tests/iv_bm_search_test.h
+++ b/tests/iv_bm_search_test.h
@@ -26,6 +26,8 @@
#include <intviewer/bm_byte.h>
#include <intviewer/gvtypes.h>
+#include <intviewer/viewer-utils.h>
+#include <intviewer/bm_chartype.h>
/**
* The fixture for testing class BmByteTest.
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]