[banshee] Strip nonspacing marks off all letters when searching (bgo#529767)



commit e7969e5ebb7617959afe022d2f7e23df71e9e2aa
Author: Alexander Kojevnikov <alexander kojevnikov com>
Date:   Sat Nov 21 21:07:49 2009 +1100

    Strip nonspacing marks off all letters when searching (bgo#529767)

 .../Banshee.Database/BansheeDbFormatMigrator.cs    |    2 +-
 src/Libraries/Hyena/Hyena/StringUtil.cs            |   10 +++++-----
 src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs |   15 ++++++++-------
 3 files changed, 14 insertions(+), 13 deletions(-)
---
diff --git a/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs b/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs
index fbce6df..85f8c5d 100644
--- a/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs
+++ b/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs
@@ -57,7 +57,7 @@ namespace Banshee.Database
         //       this version MUST be incremented and a migration method
         //       MUST be supplied to match the new version number
         protected const int CURRENT_VERSION = 36;
-        protected const int CURRENT_METADATA_VERSION = 6;
+        protected const int CURRENT_METADATA_VERSION = 7;
 
 #region Migration Driver
 
diff --git a/src/Libraries/Hyena/Hyena/StringUtil.cs b/src/Libraries/Hyena/Hyena/StringUtil.cs
index 9070b5c..8de7d3b 100644
--- a/src/Libraries/Hyena/Hyena/StringUtil.cs
+++ b/src/Libraries/Hyena/Hyena/StringUtil.cs
@@ -185,11 +185,11 @@ namespace Hyena
             val = val.ToLower ();
             StringBuilder sb = new StringBuilder ();
             UnicodeCategory category;
-            bool previous_was_latin = false;
+            bool previous_was_letter = false;
             bool got_space = false;
 
-            // Normalizing to KD splits into (base, combining) so we can check for Latin
-            // characters and then strip off any NonSpacingMarks following them
+            // Normalizing to KD splits into (base, combining) so we can check for letters
+            // and then strip off any NonSpacingMarks following them
             foreach (char orig_c in val.TrimStart ().Normalize (NormalizationForm.FormKD)) {
 
                 // Check for a special case *before* whitespace. This way, if
@@ -208,7 +208,7 @@ namespace Hyena
                 category = Char.GetUnicodeCategory (c);
                 if (category == UnicodeCategory.OtherPunctuation) {
                     // Skip punctuation
-                } else if (!(previous_was_latin && category == UnicodeCategory.NonSpacingMark)) {
+                } else if (!(previous_was_letter && category == UnicodeCategory.NonSpacingMark)) {
                     if (got_space) {
                         sb.Append (" ");
                         got_space = false;
@@ -217,7 +217,7 @@ namespace Hyena
                 }
 
                 // Can ignore A-Z because we've already lowercased the char
-                previous_was_latin = (c >= 'a' && c <= 'z');
+                previous_was_letter = Char.IsLetter (c);
             }
 
             string result = sb.ToString ();
diff --git a/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs b/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs
index f0b1df7..ecf331e 100644
--- a/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs
+++ b/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs
@@ -203,14 +203,15 @@ href=http://lkjdflkjdflkjj>baz foo< /a> bar"));
             AssertSearchKey ("Ź", "z");
             AssertSearchKey ("ż", "z");
             AssertSearchKey ("Å»", "z");
-        }
 
-        // Test that combining diacritics are preserved, and combined, for non-Latin characters.
-        [Test]
-        public void TestPreserveDiacritics ()
-        {
-            AssertSearchKey ("\u304c", "\u304c");
-            AssertSearchKey ("\u304b\u3099", "\u304c");
+            // Hiragana
+            AssertSearchKey ("\u304c", "\u304b");
+
+            // Cyrillic
+            AssertSearchKey ("\u0451", "\u0435");
+            AssertSearchKey ("\u0401", "\u0435");
+            AssertSearchKey ("\u0439", "\u0438");
+            AssertSearchKey ("\u0419", "\u0438");
         }
 
         // Test that some non-Latin characters are converted to Latin counterparts.



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]