=?utf-8?q?=5Bfolks=5D_Bug_670872_=E2=80=94_Should_be_less_sensitive_to_ac?= =?utf-8?q?centuated_chars?=
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [folks] Bug 670872 â Should be less sensitive to accentuated chars
- Date: Mon, 26 Mar 2012 10:39:12 +0000 (UTC)
commit f8e59f0d0a051ed9dcfeab2467b78ca1aabec741
Author: Philip Withnall <philip tecnocode co uk>
Date: Sat Mar 24 22:18:21 2012 +0000
Bug 670872 â Should be less sensitive to accentuated chars
Strip accented characters from names and make them all lower case when
searching for potential matches between individuals.
This uses a modified version of Empathy's live search code to allow for
string matches regardless of case or accentuation.
The commit also fixes the potential match code to handle UTF-8 properly,
which it wasn't doing before.
This includes a test case.
Closes: https://bugzilla.gnome.org/show_bug.cgi?id=670872
NEWS | 1 +
folks/potential-match.vala | 112 ++++++++++++++++++++++++++++++++++++----
tests/tracker/match-name.vala | 11 ++++
3 files changed, 112 insertions(+), 12 deletions(-)
---
diff --git a/NEWS b/NEWS
index 9f5e7bd..c0cc840 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,7 @@ Bugs fixed:
* Bug 672373 â folks-import segfaults (Archlinux x86_64)
* Bug 670348 â Handle Telepathy CMs crashing/being invalidated
* Bug 671900 â Folks should not suggest linking contacts from telepathy-salut
+* Bug 670872 â Should be less sensitive to accentuated chars
Overview of changes from libfolks 0.6.6 to libfolks 0.6.7
=============================================================
diff --git a/folks/potential-match.vala b/folks/potential-match.vala
index 83fb40b..d4f44a4 100644
--- a/folks/potential-match.vala
+++ b/folks/potential-match.vala
@@ -495,6 +495,69 @@ public class Folks.PotentialMatch : Object
return distance;
}
+ /**
+ * stripped_char:
+ *
+ * Returns a stripped version of @ch, removing any case, accentuation
+ * mark, or any special mark on it.
+ *
+ * Copied from Empathy's libempathy-gtk/empathy-live-search.c.
+ *
+ * Copyright (C) 2010 Collabora Ltd.
+ * Copyright (C) 2007-2010 Nokia Corporation.
+ *
+ * Authors: Felix Kaser <felix kaser collabora co uk>
+ * Xavier Claessens <xavier claessens collabora co uk>
+ * Claudio Saavedra <csaavedra igalia com>
+ */
+ private unichar _stripped_char (unichar ch)
+ {
+ unichar retval[1] = { 0 };
+ var utype = ch.type ();
+
+ switch (utype)
+ {
+ case UnicodeType.CONTROL:
+ case UnicodeType.FORMAT:
+ case UnicodeType.UNASSIGNED:
+ case UnicodeType.NON_SPACING_MARK:
+ case UnicodeType.COMBINING_MARK:
+ case UnicodeType.ENCLOSING_MARK:
+ /* Ignore those */
+ break;
+ case UnicodeType.PRIVATE_USE:
+ case UnicodeType.SURROGATE:
+ case UnicodeType.LOWERCASE_LETTER:
+ case UnicodeType.MODIFIER_LETTER:
+ case UnicodeType.OTHER_LETTER:
+ case UnicodeType.TITLECASE_LETTER:
+ case UnicodeType.UPPERCASE_LETTER:
+ case UnicodeType.DECIMAL_NUMBER:
+ case UnicodeType.LETTER_NUMBER:
+ case UnicodeType.OTHER_NUMBER:
+ case UnicodeType.CONNECT_PUNCTUATION:
+ case UnicodeType.DASH_PUNCTUATION:
+ case UnicodeType.CLOSE_PUNCTUATION:
+ case UnicodeType.FINAL_PUNCTUATION:
+ case UnicodeType.INITIAL_PUNCTUATION:
+ case UnicodeType.OTHER_PUNCTUATION:
+ case UnicodeType.OPEN_PUNCTUATION:
+ case UnicodeType.CURRENCY_SYMBOL:
+ case UnicodeType.MODIFIER_SYMBOL:
+ case UnicodeType.MATH_SYMBOL:
+ case UnicodeType.OTHER_SYMBOL:
+ case UnicodeType.LINE_SEPARATOR:
+ case UnicodeType.PARAGRAPH_SEPARATOR:
+ case UnicodeType.SPACE_SEPARATOR:
+ default:
+ ch = ch.tolower ();
+ ch.fully_decompose (false, retval);
+ break;
+ }
+
+ return retval[0];
+ }
+
/* Calculate matches and transpositions as defined by the Jaro distance.
*/
private int _matches (string s1, string s2, int max_dist, out double t)
@@ -502,10 +565,22 @@ public class Folks.PotentialMatch : Object
int matches = 0;
t = 0.0;
- for (int i=0; i < s1.length; i++)
+ assert (s1.validate ());
+ assert (s2.validate ());
+
+ int idx = 0;
+ unichar look_for = 0;
+
+ while (s1.get_next_char (ref idx, out look_for))
{
- var look_for = s1.slice (i, i + 1);
- int contains = this._contains (s2, look_for, i, max_dist);
+ /* Skip uninteresting characters. */
+ look_for = this._stripped_char (look_for);
+ if (look_for == 0)
+ {
+ continue;
+ }
+
+ int contains = this._contains (s2, look_for, idx, max_dist);
if (contains >= 0)
{
matches++;
@@ -523,20 +598,33 @@ public class Folks.PotentialMatch : Object
/* If haystack contains c in pos return 0, if it contains
* it withing the bounds of max_dist return abs(pos-pos_found).
- * If its not found, return -1. */
- private int _contains (string haystack, string c, int pos, int max_dist)
+ * If its not found, return -1.
+ *
+ * pos and max_dist are both in bytes.
+ *
+ * Note: haystack must have been validated using haystack.validate() before
+ * being passed to this method. */
+ private int _contains (string haystack, unichar c, int pos, int max_dist)
{
- if (pos < haystack.length && haystack.slice (pos, pos + 1) == c)
+ var haystack_len = haystack.length; /* in bytes */
+
+ if (pos < haystack_len && haystack.get_char (pos) == c)
return 0;
- for (int i=pos-max_dist; i <= pos + max_dist; i++)
+ int idx = (pos - max_dist).clamp (0, haystack_len);
+ unichar ch = 0;
+
+ while (idx < pos + max_dist && haystack.get_next_char (ref idx, out ch))
{
- if (i < 0 || i >= haystack.length)
- continue;
+ /* Skip uninteresting characters. */
+ ch = this._stripped_char (ch);
+ if (ch == 0)
+ {
+ continue;
+ }
- var str = haystack.slice (i, i + 1);
- if (str == c)
- return (pos - i).abs ();
+ if (ch == c)
+ return (pos - idx).abs ();
}
return -1;
diff --git a/tests/tracker/match-name.vala b/tests/tracker/match-name.vala
index db1bb29..cf518ea 100644
--- a/tests/tracker/match-name.vala
+++ b/tests/tracker/match-name.vala
@@ -48,6 +48,8 @@ public class MatchNameTests : Folks.TestCase
this.test_match_name_2);
this.add_test ("test potential match by name #3 ",
this.test_match_name_3);
+ this.add_test ("test potential match by name #4 ",
+ this.test_match_name_4);
}
public override void set_up ()
@@ -99,6 +101,15 @@ public class MatchNameTests : Folks.TestCase
assert (this._match >= Folks.MatchResult.MEDIUM);
}
+ public void test_match_name_4 ()
+ {
+ /* Chosen to test the accent- and case-invariance of the matching
+ * algorithm. The string's repeated so the string lengths get us up to
+ * a MEDIUM result. */
+ this._test_match_name ("PÃtÃPÃtÃPÃtÃ", "patepatepate");
+ assert (this._match >= Folks.MatchResult.MEDIUM);
+ }
+
private async void _test_match_name_async ()
{
var store = BackendStore.dup ();
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]