[glib/unicode-test] Add a unicode data test



commit 5988509d25a99836e904988be57541997e237f23
Author: Matthias Clasen <mclasen redhat com>
Date:   Mon Jul 8 13:38:36 2019 -0400

    Add a unicode data test
    
    This just dumps out our Unicode data for
    given input, and can compare the results
    to expected values.
    
    This has been useful to me for some quick
    inspection of Unicode data.

 tests/chars/one.chars    |   1 +
 tests/chars/one.expected |   4 +
 tests/meson.build        |   3 +
 tests/test-common.c      |  81 ++++++++++++++
 tests/test-common.h      |  10 ++
 tests/unicode-data.c     | 270 +++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 369 insertions(+)
---
diff --git a/tests/chars/one.chars b/tests/chars/one.chars
new file mode 100644
index 000000000..c26db8011
--- /dev/null
+++ b/tests/chars/one.chars
@@ -0,0 +1 @@
+a b c d e f g h i j k l​m n o p        q
r
s
diff --git a/tests/chars/one.expected b/tests/chars/one.expected
new file mode 100644
index 000000000..bfb819ed4
--- /dev/null
+++ b/tests/chars/one.expected
@@ -0,0 +1,4 @@
+Text:       a    [ ]  b    [0xa0]c    [0x2002]d    [0x2003]e    [0x2004]f    [0x2005]g    [0x2006]h    
[0x2007]i    [0x2008]j    [0x2009]k    [0x200a]l    [0x200b]m    [0x202f]n    [0x205f]o    [0x3000]p    
[0x09]q    [0x2028]r    [0x2029]s    [0x0a]
+Char type:  Ll   Zs   Ll   Zs    Ll   Zs      Ll   Zs      Ll   Zs      Ll   Zs      Ll   Zs      Ll   Zs    
  Ll   Zs      Ll   Zs      Ll   Zs      Ll   Cf      Ll   Zs      Ll   Zs      Ll   Zs      Ll   Cc    Ll   
Zl      Ll   Zp      Ll   Cc    
+Break type: AL   SP   AL   GL    AL   BA      AL   BA      AL   BA      AL   BA      AL   BA      AL   GL    
  AL   BA      AL   BA      AL   BA      AL   ZW      AL   GL      AL   BA      AL   BA      AL   BA    AL   
BK      AL   BK      AL   LF    
+Script:     Latn Zyyy Latn Zyyy  Latn Zyyy    Latn Zyyy    Latn Zyyy    Latn Zyyy    Latn Zyyy    Latn Zyyy  
  Latn Zyyy    Latn Zyyy    Latn Zyyy    Latn Zyyy    Latn Zyyy    Latn Zyyy    Latn Zyyy    Latn Zyyy  Latn 
Zyyy    Latn Zyyy    Latn Zyyy  
diff --git a/tests/meson.build b/tests/meson.build
index ce3044258..0b33ae90b 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -43,6 +43,9 @@ tests = {
   'threadpool-test' : {'suite' : ['slow']},
   'type-test' : {},
   'unicode-caseconv' : {},
+  'unicode-data' : {
+    'extra_sources' : 'test-common.c',
+  }, 
   'unicode-encoding' : {},
   'module-test' : {
     'dependencies' : [libgmodule_dep],
diff --git a/tests/test-common.c b/tests/test-common.c
new file mode 100644
index 000000000..c317b3b1c
--- /dev/null
+++ b/tests/test-common.c
@@ -0,0 +1,81 @@
+/* GLib
+ * test-common.c: Common test code
+ *
+ * Copyright (C) 2014 Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.         See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <glib.h>
+#include <string.h>
+
+#include <locale.h>
+
+#ifdef G_OS_WIN32
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+
+#include "test-common.h"
+
+char *
+diff_with_file (const char  *file,
+                char        *text,
+                gssize       len,
+                GError     **error)
+{
+  const char *command[] = { "diff", "-u", "-i", file, NULL, NULL };
+  char *diff, *tmpfile;
+  int fd;
+
+  diff = NULL;
+
+  if (len < 0)
+    len = strlen (text);
+
+  /* write the text buffer to a temporary file */
+  fd = g_file_open_tmp (NULL, &tmpfile, error);
+  if (fd < 0)
+    return NULL;
+
+  if (write (fd, text, len) != (int) len)
+    {
+      close (fd);
+      g_set_error (error,
+                   G_FILE_ERROR, G_FILE_ERROR_FAILED,
+                   "Could not write data to temporary file '%s'", tmpfile);
+      goto done;
+    }
+  close (fd);
+  command[4] = tmpfile;
+
+  /* run diff command */
+  g_spawn_sync (NULL,
+                (char **) command,
+                NULL,
+                G_SPAWN_SEARCH_PATH,
+                NULL, NULL,
+                &diff,
+                NULL, NULL,
+                error);
+
+done:
+  unlink (tmpfile);
+  g_free (tmpfile);
+
+  return diff;
+}
diff --git a/tests/test-common.h b/tests/test-common.h
new file mode 100644
index 000000000..2b4de821e
--- /dev/null
+++ b/tests/test-common.h
@@ -0,0 +1,10 @@
+#ifndef __TEST_COMMON_H__
+#define __TEST_COMMON_H__
+
+char * diff_with_file (const char  *file,
+                       char        *text,
+                       gssize       len,
+                       GError     **error);
+
+
+#endif
diff --git a/tests/unicode-data.c b/tests/unicode-data.c
new file mode 100644
index 000000000..1c2eef1d2
--- /dev/null
+++ b/tests/unicode-data.c
@@ -0,0 +1,270 @@
+/* GLib
+ * unicode-data.c: Test Unicode character data
+ *
+ * Copyright (C) 2019 Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.         See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+#include <glib.h>
+#include <string.h>
+#include <locale.h>
+
+#ifndef G_OS_WIN32
+#include <unistd.h>
+#endif
+
+#include "test-common.h"
+
+static const char *
+char_type (GUnicodeType t)
+{
+  const char *names[] = {
+    "Cc", "Cf", "Cn", "Co", "Cs", "Ll", "Lm", "Lo", "Lt",
+    "Lu", "Mc", "Me", "Mn", "Nd", "Nl", "No", "Pc", "Pd",
+    "Pe", "Pf", "Pi", "Po", "Ps", "Sc", "Sk", "Sm", "So",
+    "Zl", "Zp", "Zs"
+  };
+  return names[t];
+}
+
+static const char *
+break_type (GUnicodeBreakType t)
+{
+  const char *names[] = {
+    "BK", "CR", "LF", "CM", "SG", "ZW", "IN", "GL", "CB",
+    "SP", "BA", "BB", "B2", "HY", "NS", "OP", "CL", "QU",
+    "EX", "ID", "NU", "IS", "SY", "AL", "PR", "PO", "SA",
+    "AI", "XX", "NL", "WJ", "JL", "JV", "JT", "H2", "H3",
+    "CP", "CJ", "HL", "RI", "EB", "EM", "ZWJ"
+  };
+  return names[t];
+}
+
+static const char *
+script_name (GUnicodeScript s)
+{
+  const char *names[] = {
+    "Zyyy", "Zinh", "Arab", "Armn", "Beng", "Bopo", "Cher",
+    "Copt", "Cyrl", "Dsrt", "Deva", "Ethi", "Geor", "Goth",
+    "Grek", "Gujr", "Guru", "Hani", "Hang", "Hebr", "Hira",
+    "Knda", "Kana", "Khmr", "Laoo", "Latn", "Mlym", "Mong",
+    "Mymr", "Ogam", "Ital", "Orya", "Runr", "Sinh", "Syrc",
+    "Taml", "Telu", "Thaa", "Thai", "Tibt", "Cans", "Yiii",
+    "Tglg", "Hano", "Buhd", "Tagb", "Brai", "Cprt", "Limb",
+    "Osma", "Shaw", "Linb", "Tale", "Ugar", "Talu", "Bugi",
+    "Glag", "Tfng", "Sylo", "Xpeo", "Khar", "Zzzz", "Bali",
+    "Xsux", "Phnx", "Phag", "Nkoo", "Kali", "Lepc", "Rjng",
+    "Sund", "Saur", "Cham", "Olck", "Vaii", "Cari", "Lyci",
+    "Lydi", "Avst", "Bamu", "Egyp", "Armi", "Phli", "Prti",
+    "Java", "Kthi", "Lisu", "Mtei", "Sarb", "Orkh", "Samr",
+    "Lana", "Tavt", "Batk", "Brah", "Mand", "Cakm", "Merc",
+    "Mero", "Plrd", "Shrd", "Sora", "Takr", "Bass", "Aghb",
+    "Dupl", "Elba", "Gran", "Khoj", "Sind", "Lina", "Mahj",
+    "Mani", "Mend", "Modi", "Mroo", "Nbat", "Narb", "Perm",
+    "Hmng", "Palm", "Pauc", "Phlp", "Sidd", "Tirh", "Wara",
+    "Ahom", "Hluw", "Hatr", "Mult", "Hung", "Sgnw", "Adlm",
+    "Bhks", "Marc", "Newa", "Osge", "Tang", "Gonm", "Nshu",
+    "Soyo", "Zanb", "Dogr", "Gong", "Rohg", "Maka", "Medf",
+    "Sogo", "Sogd", "Elym", "Nand", "Rohg", "Wcho"
+  };
+  return names[s];
+}
+
+static void
+test_file (const char *filename, GString *string)
+{
+  char *contents;
+  gsize length;
+  GError *error = NULL;
+  char *p;
+  GString *s1, *s2, *s3;
+  GUnicodeScript prev_script = -1;
+  int m;
+
+  if (!g_file_get_contents (filename, &contents, &length, &error))
+    {
+      g_error ("%s", error->message);
+      g_error_free (error);
+      return;
+    }
+
+  g_string_append (string, "Text: ");
+  s1 = g_string_new ("Char type: ");
+  s2 = g_string_new ("Break type: ");
+  s3 = g_string_new ("Script: ");
+
+  m = MAX (MAX (s1->len, s2->len), s3->len);
+
+  g_string_append_printf (s1, "%*s", (int)(m - s1->len), "");
+  g_string_append_printf (s2, "%*s", (int)(m - s2->len), "");
+  g_string_append_printf (s3, "%*s", (int)(m - s3->len), "");
+  g_string_append_printf (string, "%*s", (int)(m - strlen ("Text: ")), "");
+
+  for (p = contents; *p; p = g_utf8_next_char (p))
+    {
+      gunichar ch = g_utf8_get_char (p);
+      const char *ctype = char_type (g_unichar_type (ch));
+      const char *btype = break_type (g_unichar_break_type (ch));
+      GUnicodeScript script = g_unichar_get_script (ch);
+      int c = strlen (ctype);
+      int b = strlen (btype);
+      int s = 0;
+      int t = 0;
+
+      g_string_append_printf (s1, "%s", ctype);
+      g_string_append_printf (s2, "%s", btype);
+
+      if (prev_script != script)
+        {
+          const char *str = script_name (script);
+          prev_script = script;
+          g_string_append (s3, str);
+          s = strlen (str);
+        }
+
+      if (ch == 0x20)
+        {
+          g_string_append (string, "[ ]");
+          t = 3;
+        }
+      else if (g_unichar_isgraph (ch) &&
+               (ch != 0x2028) &&
+               (ch != 0x2029))
+        {
+          g_string_append_unichar (string, ch);
+          t = 1;
+        }
+      else
+        {
+          char *str = g_strdup_printf ("[%#04x]", ch);
+          g_string_append (string, str); 
+          t = strlen (str);
+          g_free (str);
+        }
+
+      m = MAX (t, MAX (MAX (c + 1, b + 1), s + 1));
+
+      g_string_append_printf (string, "%*s", m - t, "");
+      g_string_append_printf (s1, "%*s", m - c, "");
+      g_string_append_printf (s2, "%*s", m - b, "");
+      g_string_append_printf (s3, "%*s", m - s, "");
+    }
+
+  g_string_append (string, "\n");
+  g_string_append_len (string, s1->str, s1->len);
+  g_string_append (string, "\n");
+  g_string_append_len (string, s2->str, s2->len);
+  g_string_append (string, "\n");
+  g_string_append_len (string, s3->str, s3->len);
+  g_string_append (string, "\n");
+
+  g_string_free (s1, TRUE);
+  g_string_free (s2, TRUE);
+  g_string_free (s3, TRUE);
+
+  g_free (contents);
+}
+
+static gchar *
+get_expected_filename (const gchar *filename)
+{
+  gchar *f, *p, *expected;
+
+  f = g_strdup (filename);
+  p = strstr (f, ".chars");
+  if (p)
+    *p = 0;
+  expected = g_strconcat (f, ".expected", NULL);
+
+  g_free (f);
+
+  return expected;
+}
+
+static void
+test_break (gconstpointer d)
+{
+  const char *filename = d;
+  char *expected_file;
+  GError *error = NULL;
+  GString *dump;
+  char *diff = NULL;
+
+  expected_file = get_expected_filename (filename);
+
+  dump = g_string_sized_new (0);
+
+  test_file (filename, dump);
+
+  diff = diff_with_file (expected_file, dump->str, dump->len, &error);
+  g_assert_no_error (error);
+
+  if (diff && diff[0])
+    {
+      g_printerr ("Contents don't match expected contents:\n%s", diff);
+      g_test_fail ();
+      g_free (diff);
+    }
+
+  g_string_free (dump, TRUE);
+  g_free (expected_file);
+}
+
+int
+main (int argc, char *argv[])
+{
+  GDir *dir;
+  GError *error = NULL;
+  const gchar *name;
+  gchar *path;
+
+  g_setenv ("LC_ALL", "en_US.UTF-8", TRUE);
+  setlocale (LC_ALL, "");
+
+  g_test_init (&argc, &argv, NULL);
+
+  /* allow to easily generate expected output for new test cases */
+  if (argc > 1)
+    {
+      GString *string;
+
+      string = g_string_sized_new (0);
+      test_file (argv[1], string);
+      g_print ("%s", string->str);
+
+      return 0;
+    }
+
+  path = g_test_build_filename (G_TEST_DIST, "chars", NULL);
+  dir = g_dir_open (path, 0, &error);
+  g_free (path);
+  g_assert_no_error (error);
+  while ((name = g_dir_read_name (dir)) != NULL)
+    {
+      if (!strstr (name, "chars"))
+        continue;
+
+      path = g_strdup_printf ("/chars/%s", name);
+      g_test_add_data_func_full (path, g_test_build_filename (G_TEST_DIST, "chars", name, NULL),
+                                 test_break, g_free);
+      g_free (path);
+    }
+  g_dir_close (dir);
+
+  return g_test_run ();
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]