[banshee] Strip nonspacing marks off all letters when searching (bgo#529767)
- From: Alexander Kojevnikov <alexk src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [banshee] Strip nonspacing marks off all letters when searching (bgo#529767)
- Date: Sat, 21 Nov 2009 10:08:15 +0000 (UTC)
commit e7969e5ebb7617959afe022d2f7e23df71e9e2aa
Author: Alexander Kojevnikov <alexander kojevnikov com>
Date: Sat Nov 21 21:07:49 2009 +1100
Strip nonspacing marks off all letters when searching (bgo#529767)
.../Banshee.Database/BansheeDbFormatMigrator.cs | 2 +-
src/Libraries/Hyena/Hyena/StringUtil.cs | 10 +++++-----
src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs | 15 ++++++++-------
3 files changed, 14 insertions(+), 13 deletions(-)
---
diff --git a/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs b/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs
index fbce6df..85f8c5d 100644
--- a/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs
+++ b/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs
@@ -57,7 +57,7 @@ namespace Banshee.Database
// this version MUST be incremented and a migration method
// MUST be supplied to match the new version number
protected const int CURRENT_VERSION = 36;
- protected const int CURRENT_METADATA_VERSION = 6;
+ protected const int CURRENT_METADATA_VERSION = 7;
#region Migration Driver
diff --git a/src/Libraries/Hyena/Hyena/StringUtil.cs b/src/Libraries/Hyena/Hyena/StringUtil.cs
index 9070b5c..8de7d3b 100644
--- a/src/Libraries/Hyena/Hyena/StringUtil.cs
+++ b/src/Libraries/Hyena/Hyena/StringUtil.cs
@@ -185,11 +185,11 @@ namespace Hyena
val = val.ToLower ();
StringBuilder sb = new StringBuilder ();
UnicodeCategory category;
- bool previous_was_latin = false;
+ bool previous_was_letter = false;
bool got_space = false;
- // Normalizing to KD splits into (base, combining) so we can check for Latin
- // characters and then strip off any NonSpacingMarks following them
+ // Normalizing to KD splits into (base, combining) so we can check for letters
+ // and then strip off any NonSpacingMarks following them
foreach (char orig_c in val.TrimStart ().Normalize (NormalizationForm.FormKD)) {
// Check for a special case *before* whitespace. This way, if
@@ -208,7 +208,7 @@ namespace Hyena
category = Char.GetUnicodeCategory (c);
if (category == UnicodeCategory.OtherPunctuation) {
// Skip punctuation
- } else if (!(previous_was_latin && category == UnicodeCategory.NonSpacingMark)) {
+ } else if (!(previous_was_letter && category == UnicodeCategory.NonSpacingMark)) {
if (got_space) {
sb.Append (" ");
got_space = false;
@@ -217,7 +217,7 @@ namespace Hyena
}
// Can ignore A-Z because we've already lowercased the char
- previous_was_latin = (c >= 'a' && c <= 'z');
+ previous_was_letter = Char.IsLetter (c);
}
string result = sb.ToString ();
diff --git a/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs b/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs
index f0b1df7..ecf331e 100644
--- a/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs
+++ b/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs
@@ -203,14 +203,15 @@ href=http://lkjdflkjdflkjj>baz foo< /a> bar"));
AssertSearchKey ("Ź", "z");
AssertSearchKey ("ż", "z");
AssertSearchKey ("Å»", "z");
- }
- // Test that combining diacritics are preserved, and combined, for non-Latin characters.
- [Test]
- public void TestPreserveDiacritics ()
- {
- AssertSearchKey ("\u304c", "\u304c");
- AssertSearchKey ("\u304b\u3099", "\u304c");
+ // Hiragana
+ AssertSearchKey ("\u304c", "\u304b");
+
+ // Cyrillic
+ AssertSearchKey ("\u0451", "\u0435");
+ AssertSearchKey ("\u0401", "\u0435");
+ AssertSearchKey ("\u0439", "\u0438");
+ AssertSearchKey ("\u0419", "\u0438");
}
// Test that some non-Latin characters are converted to Latin counterparts.
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]