[libdazzle] util: add levenshtein implementation



commit 9df806c88a6246eadfdfc89cfcab73483ad16a1e
Author: Christian Hergert <chergert redhat com>
Date:   Sat Jun 3 18:53:51 2017 -0700

    util: add levenshtein implementation

 src/dazzle.h               |    1 +
 src/meson.build            |    2 +
 src/util/dzl-levenshtein.c |  105 ++++++++++++++++++++++++++++++++++++++++++++
 src/util/dzl-levenshtein.h |   31 +++++++++++++
 tests/meson.build          |    6 +++
 tests/test-levenshtein.c   |   39 ++++++++++++++++
 6 files changed, 184 insertions(+), 0 deletions(-)
---
diff --git a/src/dazzle.h b/src/dazzle.h
index c98db61..688de66 100644
--- a/src/dazzle.h
+++ b/src/dazzle.h
@@ -108,6 +108,7 @@ G_BEGIN_DECLS
 #include "util/dzl-file-manager.h"
 #include "util/dzl-gdk.h"
 #include "util/dzl-heap.h"
+#include "util/dzl-levenshtein.h"
 #include "util/dzl-pango.h"
 #include "util/dzl-rgba.h"
 #include "util/dzl-ring.h"
diff --git a/src/meson.build b/src/meson.build
index 2d07bbd..9f7f29b 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -121,6 +121,7 @@ libdazzle_public_headers = [
   'util/dzl-file-manager.h',
   'util/dzl-gdk.h',
   'util/dzl-heap.h',
+  'util/dzl-levenshtein.h',
   'util/dzl-pango.h',
   'util/dzl-rgba.h',
   'util/dzl-ring.h',
@@ -241,6 +242,7 @@ libdazzle_public_sources = [
   'util/dzl-file-manager.c',
   'util/dzl-gdk.c',
   'util/dzl-heap.c',
+  'util/dzl-levenshtein.c',
   'util/dzl-pango.c',
   'util/dzl-rgba.c',
   'util/dzl-ring.c',
diff --git a/src/util/dzl-levenshtein.c b/src/util/dzl-levenshtein.c
new file mode 100644
index 0000000..16261b0
--- /dev/null
+++ b/src/util/dzl-levenshtein.c
@@ -0,0 +1,105 @@
+/* dzl-levenshtein.c
+ *
+ * Copyright (C) 2013-2017 Christian Hergert <christian hergert me>
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <glib.h>
+#include <string.h>
+
+gint
+dzl_levenshtein (const gchar *needle,
+                 const gchar *haystack)
+{
+   const gchar *s;
+   const gchar *t;
+   gunichar sc;
+   gunichar tc;
+   gint *v0;
+   gint *v1;
+   gint haystack_char_len;
+   gint cost;
+   gint i;
+   gint j;
+   gint ret;
+
+   g_return_val_if_fail (needle, G_MAXINT);
+   g_return_val_if_fail (haystack, G_MAXINT);
+
+   /*
+    * Handle degenerate cases.
+    */
+   if (!g_strcmp0(needle, haystack)) {
+      return 0;
+   } else if (!*needle) {
+      return g_utf8_strlen(haystack, -1);
+   } else if (!*haystack) {
+      return g_utf8_strlen(needle, -1);
+   }
+
+   haystack_char_len = g_utf8_strlen (haystack, -1);
+
+   /*
+    * Create two vectors to hold our states.
+    */
+   v0 = g_new0(int, haystack_char_len + 1);
+   v1 = g_new0(int, haystack_char_len + 1);
+
+   /*
+    * initialize v0 (the previous row of distances).
+    * this row is A[0][i]: edit distance for an empty needle.
+    * the distance is just the number of characters to delete from haystack.
+    */
+   for (i = 0; i < haystack_char_len + 1; i++) {
+      v0[i] = i;
+   }
+
+   for (i = 0, s = needle; s && *s; i++, s = g_utf8_next_char(s)) {
+      /*
+       * Calculate v1 (current row distances) from the previous row v0.
+       */
+
+      sc = g_utf8_get_char(s);
+
+      /*
+       * first element of v1 is A[i+1][0]
+       *
+       * edit distance is delete (i+1) chars from needle to match empty
+       * haystack.
+       */
+      v1[0] = i + 1;
+
+      /*
+       * use formula to fill in the rest of the row.
+       */
+      for (j = 0, t = haystack; t && *t; j++, t = g_utf8_next_char(t)) {
+         tc = g_utf8_get_char(t);
+         cost = (sc == tc) ? 0 : 1;
+         v1[j+1] = MIN(v1[j] + 1, MIN(v0[j+1] + 1, v0[j] + cost));
+      }
+
+      /*
+       * copy v1 (current row) to v0 (previous row) for next iteration.
+       */
+      memcpy(v0, v1, sizeof(gint) * haystack_char_len);
+   }
+
+   ret = v1[haystack_char_len];
+
+   g_free(v0);
+   g_free(v1);
+
+   return ret;
+}
diff --git a/src/util/dzl-levenshtein.h b/src/util/dzl-levenshtein.h
new file mode 100644
index 0000000..23a09a6
--- /dev/null
+++ b/src/util/dzl-levenshtein.h
@@ -0,0 +1,31 @@
+/* dzl-levenshtein.h
+ *
+ * Copyright (C) 2017 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef DZL_LEVENSHTEIN_H
+#define DZL_LEVENSHTEIN_H
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+gint dzl_levenshtein (const gchar *needle,
+                      const gchar *haystack);
+
+G_END_DECLS
+
+#endif /* DZL_LEVENSHTEIN_H */
diff --git a/tests/meson.build b/tests/meson.build
index 90dd659..f9a5075 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -220,3 +220,9 @@ test_trie = executable('test-trie', 'test-trie.c',
      link_args: test_link_args,
   dependencies: libdazzle_deps + [libdazzle_dep],
 )
+
+test_levenshtein = executable('test-levenshtein', 'test-levenshtein.c',
+        c_args: test_cflags,
+     link_args: test_link_args,
+  dependencies: libdazzle_deps + [libdazzle_dep],
+)
diff --git a/tests/test-levenshtein.c b/tests/test-levenshtein.c
new file mode 100644
index 0000000..82523b1
--- /dev/null
+++ b/tests/test-levenshtein.c
@@ -0,0 +1,39 @@
+#include <dazzle.h>
+
+typedef struct
+{
+  const gchar *needle;
+  const gchar *haystack;
+  gint         expected_score;
+} WordCheck;
+
+static void
+test_levenshtein_basic (void)
+{
+  static const WordCheck check[] = {
+    { "gtk", "gkt", 2 },
+    { "LibreFreeOpen", "Cromulent", 10 },
+    { "Xorg", "Wayland", 7 },
+    { "glib", "gobject", 6 },
+    { "gbobject", "gobject", 1 },
+    { "flip", "fliiiip", 3 },
+    { "flip", "fliiiipper", 6 },
+    { NULL }
+  };
+
+  for (guint i = 0; check[i].needle != NULL; i++)
+    {
+      gint score = dzl_levenshtein (check[i].needle, check[i].haystack);
+
+      g_assert_cmpint (score, ==, check[i].expected_score);
+    }
+}
+
+gint
+main (gint argc,
+      gchar *argv[])
+{
+  g_test_init (&argc, &argv, NULL);
+  g_test_add_func ("/Dazzle/Levenshtein/basic", test_levenshtein_basic);
+  return g_test_run ();
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]