banshee r5031 - in trunk/banshee: . src/Core/Banshee.Services/Banshee.Collection.Database src/Core/Banshee.Services/Banshee.Database src/Core/Banshee.Services/Banshee.Query src/Libraries/Hyena src/Libraries/Hyena/Hyena src/Libraries/Hyena/Hyena.Data.Sqlite src/Libraries/Hyena/Hyena.Query src/Libraries/Hyena/Hyena.Query/Tests src/Libraries/Hyena/Hyena/Tests
- From: gburt svn gnome org
- To: svn-commits-list gnome org
- Subject: banshee r5031 - in trunk/banshee: . src/Core/Banshee.Services/Banshee.Collection.Database src/Core/Banshee.Services/Banshee.Database src/Core/Banshee.Services/Banshee.Query src/Libraries/Hyena src/Libraries/Hyena/Hyena src/Libraries/Hyena/Hyena.Data.Sqlite src/Libraries/Hyena/Hyena.Query src/Libraries/Hyena/Hyena.Query/Tests src/Libraries/Hyena/Hyena/Tests
- Date: Wed, 11 Feb 2009 02:34:53 +0000 (UTC)
Author: gburt
Date: Wed Feb 11 02:34:53 2009
New Revision: 5031
URL: http://svn.gnome.org/viewvc/banshee?rev=5031&view=rev
Log:
2009-02-10 Gabriel Burt <gabriel burt gmail com>
Patch from John Millikin adding support for full/proper case-insensitive
unicode search, and additional support for fuzzy searching where n matches
à etc (NonSpacingMarks are stripped from a-z chars).
* src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseAlbumInfo.cs:
* src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseArtistInfo.cs:
* src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseTrackInfo.cs:
Instead of calling ToLower on *Lowered properties, use the new
Hyena.StringUtil.SearchKey that also strips accents from Latin chars.
* src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs:
Update the existing *Lowered values by using the new HYENA_SEARCH_KEY
custom Sqlite method that just calls SearchKey.
* src/Core/Banshee.Services/Banshee.Query/BansheeQuery.cs: Mark MimeType,
License, and Location as ExactStringQueryValue fields (as opposed to the
new default which strips accents and lowercases).
* src/Libraries/Hyena/Hyena.Data.Sqlite/SqliteUtils.cs: New custom
HYENA_SEARCH_KEY sqlite method.
* src/Libraries/Hyena/Hyena.Query/QueryField.cs: Get rid of the old/hacky
Sqlite-LOWER workaround, replacing it with the custom HYENA_SEARCH_KEY
method where appropriate.
* src/Libraries/Hyena/Hyena.Query/StringQueryValue.cs: In ToSql return the
value passed through SearchKey.
* src/Libraries/Hyena/Hyena.Query/Tests/QueryTests.cs: Test the new fuzzy
and exact string matching.
* src/Libraries/Hyena/Makefile.am:
* src/Libraries/Hyena/Hyena/StringUtil.cs: New SearchKey method that
lowercases and strips accents of a-z chars.
* src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs: Test the SearchKey
method.
* src/Libraries/Hyena/Hyena.Query/ExactStringQueryValue.cs: New
StringQueryValue subclass that doesn't pass the value through SearchKey.
Added:
trunk/banshee/src/Libraries/Hyena/Hyena.Query/ExactStringQueryValue.cs
Modified:
trunk/banshee/ChangeLog
trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseAlbumInfo.cs
trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseArtistInfo.cs
trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseTrackInfo.cs
trunk/banshee/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs
trunk/banshee/src/Core/Banshee.Services/Banshee.Query/BansheeQuery.cs
trunk/banshee/src/Libraries/Hyena/Hyena.Data.Sqlite/SqliteUtils.cs
trunk/banshee/src/Libraries/Hyena/Hyena.Query/QueryField.cs
trunk/banshee/src/Libraries/Hyena/Hyena.Query/StringQueryValue.cs
trunk/banshee/src/Libraries/Hyena/Hyena.Query/Tests/QueryTests.cs
trunk/banshee/src/Libraries/Hyena/Hyena/StringUtil.cs
trunk/banshee/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs
trunk/banshee/src/Libraries/Hyena/Makefile.am
Modified: trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseAlbumInfo.cs
==============================================================================
--- trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseAlbumInfo.cs (original)
+++ trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseAlbumInfo.cs Wed Feb 11 02:34:53 2009
@@ -214,12 +214,12 @@
[DatabaseColumn(Select = false)]
protected string TitleLowered {
- get { return Title == null ? null : Title.ToLower (); }
+ get { return Hyena.StringUtil.SearchKey (Title); }
}
[DatabaseColumn(Select = false)]
protected string ArtistNameLowered {
- get { return ArtistName == null ? null : ArtistName.ToLower (); }
+ get { return Hyena.StringUtil.SearchKey (ArtistName); }
}
public override string ToString ()
Modified: trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseArtistInfo.cs
==============================================================================
--- trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseArtistInfo.cs (original)
+++ trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseArtistInfo.cs Wed Feb 11 02:34:53 2009
@@ -135,7 +135,7 @@
[DatabaseColumn(Select = false)]
protected string NameLowered {
- get { return Name == null ? null : Name.ToLower (); }
+ get { return Hyena.StringUtil.SearchKey (Name); }
}
[DatabaseColumn]
Modified: trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseTrackInfo.cs
==============================================================================
--- trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseTrackInfo.cs (original)
+++ trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseTrackInfo.cs Wed Feb 11 02:34:53 2009
@@ -458,7 +458,7 @@
[DatabaseColumn(Select = false)]
protected string TitleLowered {
- get { return TrackTitle == null ? null : TrackTitle.ToLower (); }
+ get { return Hyena.StringUtil.SearchKey (TrackTitle); }
}
[DatabaseColumn(Select = false)]
Modified: trunk/banshee/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs
==============================================================================
--- trunk/banshee/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs (original)
+++ trunk/banshee/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs Wed Feb 11 02:34:53 2009
@@ -52,7 +52,7 @@
// NOTE: Whenever there is a change in ANY of the database schema,
// this version MUST be incremented and a migration method
// MUST be supplied to match the new version number
- protected const int CURRENT_VERSION = 23;
+ protected const int CURRENT_VERSION = 24;
protected const int CURRENT_METADATA_VERSION = 5;
#region Migration Driver
@@ -566,6 +566,18 @@
#endregion
+#region Version 24
+ [DatabaseVersion (24)]
+ private bool Migrate_24 ()
+ {
+ Execute ("UPDATE CoreArtists SET NameLowered = HYENA_SEARCH_KEY(Name)");
+ Execute ("UPDATE CoreAlbums SET ArtistNameLowered = HYENA_SEARCH_KEY(ArtistName)");
+ Execute ("UPDATE CoreAlbums SET TitleLowered = HYENA_SEARCH_KEY(Title)");
+ Execute ("UPDATE CoreTracks SET TitleLowered = HYENA_SEARCH_KEY(Title)");
+ return true;
+ }
+#endregion
+
#pragma warning restore 0169
#region Fresh database setup
Modified: trunk/banshee/src/Core/Banshee.Services/Banshee.Query/BansheeQuery.cs
==============================================================================
--- trunk/banshee/src/Core/Banshee.Services/Banshee.Query/BansheeQuery.cs (original)
+++ trunk/banshee/src/Core/Banshee.Services/Banshee.Query/BansheeQuery.cs Wed Feb 11 02:34:53 2009
@@ -221,7 +221,7 @@
public static QueryField LicenseUriField = new QueryField (
"licenseuri", "LicenseUri",
// Translators: noun
- Catalog.GetString ("License"), "CoreTracks.LicenseUri", false,
+ Catalog.GetString ("License"), "CoreTracks.LicenseUri", typeof(ExactStringQueryValue),
// Translators: These are unique search fields (and nouns). Please, no spaces. Blank ok.
Catalog.GetString ("license"), Catalog.GetString ("licensed"), Catalog.GetString ("under"),
"license", "licensed", "under"
@@ -261,7 +261,7 @@
public static QueryField UriField = new QueryField (
"uri", "Uri",
- Catalog.GetString ("File Location"), "CoreTracks.Uri",
+ Catalog.GetString ("File Location"), "CoreTracks.Uri", typeof(ExactStringQueryValue),
// Translators: These are unique search fields. Please, no spaces. Blank ok.
Catalog.GetString ("uri"), Catalog.GetString ("path"), Catalog.GetString ("file"), Catalog.GetString ("location"),
"uri", "path", "file", "location"
@@ -277,7 +277,7 @@
public static QueryField MimeTypeField = new QueryField (
"mimetype", "MimeType",
- Catalog.GetString ("Mime Type"), "CoreTracks.MimeType {0} OR CoreTracks.Uri {0}",
+ Catalog.GetString ("Mime Type"), "CoreTracks.MimeType {0} OR CoreTracks.Uri {0}", typeof(ExactStringQueryValue),
// Translators: These are unique search fields. Please, no spaces. Blank ok.
Catalog.GetString ("type"), Catalog.GetString ("mimetype"), Catalog.GetString ("format"), Catalog.GetString ("ext"),
"type", "mimetype", "format", "ext", "mime"
Modified: trunk/banshee/src/Libraries/Hyena/Hyena.Data.Sqlite/SqliteUtils.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena.Data.Sqlite/SqliteUtils.cs (original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena.Data.Sqlite/SqliteUtils.cs Wed Feb 11 02:34:53 2009
@@ -29,6 +29,7 @@
using System;
using System.Reflection;
using System.Text;
+using Mono.Data.Sqlite;
namespace Hyena.Data.Sqlite
{
@@ -122,4 +123,12 @@
return builder.ToString ();
}
}
+
+ [SqliteFunction (Name = "HYENA_SEARCH_KEY", FuncType = FunctionType.Scalar, Arguments = 1)]
+ internal class SearchKeyFunction : SqliteFunction
+ {
+ public override object Invoke (object[] args) {
+ return Hyena.StringUtil.SearchKey (args[0] as string);
+ }
+ }
}
Added: trunk/banshee/src/Libraries/Hyena/Hyena.Query/ExactStringQueryValue.cs
==============================================================================
--- (empty file)
+++ trunk/banshee/src/Libraries/Hyena/Hyena.Query/ExactStringQueryValue.cs Wed Feb 11 02:34:53 2009
@@ -0,0 +1,39 @@
+//
+// ExactStringQueryValue.cs
+//
+// Authors:
+// John Millikin <jmillikin gmail com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+
+namespace Hyena.Query
+{
+ // A query value that requires the string match exactly
+ public class ExactStringQueryValue : StringQueryValue
+ {
+ public override string ToSql ()
+ {
+ return String.IsNullOrEmpty (value) ? null : value.Replace ("'", "''").ToLower ();
+ }
+ }
+}
Modified: trunk/banshee/src/Libraries/Hyena/Hyena.Query/QueryField.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena.Query/QueryField.cs (original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena.Query/QueryField.cs Wed Feb 11 02:34:53 2009
@@ -150,23 +150,11 @@
StringBuilder sb = new StringBuilder ();
if (no_custom_format) {
- if (qv is StringQueryValue) {
- if (column_lowered) {
- // The column is pre-lowered, no need to call lower() in SQL
- sb.AppendFormat ("{0} {1}", Column, String.Format (op.SqlFormat, value.ToLower ()));
- } else {
- // Match string values literally and against a lower'd version. Mostly a workaround
- // the fact that Sqlite's lower() method only works for ASCII (meaning even with this,
- // we're not getting 100% case-insensitive matching).
- sb.AppendFormat ("({0} {1} {3} LOWER({0}) {2})", Column,
- String.Format (op.SqlFormat, value),
- String.Format (op.SqlFormat, value.ToLower ()),
- op.IsNot ? "AND" : "OR"
- );
- }
- } else {
- sb.AppendFormat ("{0} {1}", Column, String.Format (op.SqlFormat, value));
+ string column_with_key = Column;
+ if (qv is StringQueryValue && !(column_lowered || qv is ExactStringQueryValue)) {
+ column_with_key = String.Format ("HYENA_SEARCH_KEY({0})", Column);
}
+ sb.AppendFormat ("{0} {1}", column_with_key, String.Format (op.SqlFormat, value));
if (op.IsNot) {
return String.Format ("({0} OR {1} IS NULL)", sb.ToString (), Column);
Modified: trunk/banshee/src/Libraries/Hyena/Hyena.Query/StringQueryValue.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena.Query/StringQueryValue.cs (original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena.Query/StringQueryValue.cs Wed Feb 11 02:34:53 2009
@@ -79,7 +79,9 @@
public override string ToSql ()
{
- return String.IsNullOrEmpty (value) ? null : value.Replace ("'", "''");
+ // SearchKey() removes ' anyway, but it's escaped again so proper
+ // SQL behavior isn't dependent on search behavior.
+ return Hyena.StringUtil.SearchKey (value).Replace ("'", "''");
}
}
}
Modified: trunk/banshee/src/Libraries/Hyena/Hyena.Query/Tests/QueryTests.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena.Query/Tests/QueryTests.cs (original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena.Query/Tests/QueryTests.cs Wed Feb 11 02:34:53 2009
@@ -86,6 +86,16 @@
qv = new StringQueryValue (); qv.ParseUserQuery ("foo 'bar'");
Assert.AreEqual ("foo 'bar'", qv.Value);
Assert.AreEqual ("foo 'bar'", qv.ToUserQuery ());
+ Assert.AreEqual ("foo bar", qv.ToSql ());
+
+ qv = new StringQueryValue (); qv.ParseUserQuery ("Foo BaÃo");
+ Assert.AreEqual ("Foo BaÃo", qv.Value);
+ Assert.AreEqual ("Foo BaÃo", qv.ToUserQuery ());
+ Assert.AreEqual ("foo bano", qv.ToSql ());
+
+ qv = new ExactStringQueryValue (); qv.ParseUserQuery ("foo 'bar'");
+ Assert.AreEqual ("foo 'bar'", qv.Value);
+ Assert.AreEqual ("foo 'bar'", qv.ToUserQuery ());
Assert.AreEqual ("foo ''bar''", qv.ToSql ());
qv = new IntegerQueryValue (); qv.ParseUserQuery ("22");
Modified: trunk/banshee/src/Libraries/Hyena/Hyena/StringUtil.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena/StringUtil.cs (original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena/StringUtil.cs Wed Feb 11 02:34:53 2009
@@ -27,6 +27,7 @@
//
using System;
+using System.Collections.Generic;
using System.Text;
using System.Globalization;
using System.Text.RegularExpressions;
@@ -161,6 +162,49 @@
return (int)num + 1;
}
+ // A mapping of non-Latin characters to be considered the same as
+ // a Latin equivalent.
+ private static Dictionary<char, char> BuildSpecialCases ()
+ {
+ Dictionary<char, char> dict = new Dictionary<char, char> ();
+ dict['\u00f8'] = 'o';
+ dict['\u0142'] = 'l';
+ return dict;
+ }
+ private static Dictionary<char, char> ignored_special_cases = BuildSpecialCases ();
+
+ // Removes accents from Latin characters, and some kinds of punctuation.
+ public static string SearchKey (string val)
+ {
+ if (String.IsNullOrEmpty (val)) {
+ return val;
+ }
+
+ val = val.ToLower ();
+ StringBuilder sb = new StringBuilder ();
+ UnicodeCategory category;
+ bool previous_was_latin = false;
+
+ // Normalizing to KD splits into (base, combining) so we can check for Latin
+ // characters and then strip off any NonSpacingMarks following them
+ foreach (char c in val.Normalize (NormalizationForm.FormKD)) {
+ category = Char.GetUnicodeCategory (c);
+
+ if (ignored_special_cases.ContainsKey (c)) {
+ sb.Append (ignored_special_cases[c]);
+ } else if (category == UnicodeCategory.OtherPunctuation) {
+ // Skip punctuation
+ } else if (!(previous_was_latin && category == UnicodeCategory.NonSpacingMark)) {
+ sb.Append (c);
+ }
+
+ // Can ignore A-Z because we've already lowercased the char
+ previous_was_latin = (c >= 'a' && c <= 'z');
+ }
+
+ return sb.ToString ().Normalize (NormalizationForm.FormKC);
+ }
+
private static string invalid_path_characters = "\"\\:'~`! #$%^&*_-+|?/><[]";
private static Regex invalid_path_regex;
Modified: trunk/banshee/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs (original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs Wed Feb 11 02:34:53 2009
@@ -150,6 +150,92 @@
href=http://lkjdflkjdflkjj>baz foo< /a> bar"));
}
}
+
+ [TestFixture]
+ public class SearchKeyTests
+ {
+ private void AssertSearchKey (string before, string after)
+ {
+ Assert.AreEqual (after, StringUtil.SearchKey (before));
+ }
+
+ [Test]
+ public void TestEmpty ()
+ {
+ AssertSearchKey ("", "");
+ AssertSearchKey (null, null);
+ }
+
+ // Test that resulting search keys are in lower-case
+ [Test]
+ public void TestLowercase ()
+ {
+ AssertSearchKey ("A", "a");
+ AssertSearchKey ("\u0104", "a");
+ }
+
+ // Test that combining diacritics are removed from Latin characters.
+ [Test]
+ public void TestRemoveDiacritics ()
+ {
+ AssertSearchKey ("\u00e9", "e");
+ AssertSearchKey ("e\u0301", "e");
+
+ AssertSearchKey ("\u014d", "o");
+ AssertSearchKey ("o\u0304", "o");
+
+ AssertSearchKey ("EspaÃol", "espanol");
+ AssertSearchKey ("30 aÃos de la revoluciÃn iranÃ", "30 anos de la revolucion irani");
+ AssertSearchKey ("FRANCÃS", "frances");
+
+ // Polish letters
+ AssertSearchKey ("Ä", "a");
+ AssertSearchKey ("Ä", "a");
+ AssertSearchKey ("Ä", "c");
+ AssertSearchKey ("Ä", "c");
+ AssertSearchKey ("Ä", "e");
+ AssertSearchKey ("Ä", "e");
+ AssertSearchKey ("Å", "l");
+ AssertSearchKey ("Å", "l");
+ AssertSearchKey ("Å", "n");
+ AssertSearchKey ("Å", "n");
+ AssertSearchKey ("Ã", "o");
+ AssertSearchKey ("Ã", "o");
+ AssertSearchKey ("Å", "s");
+ AssertSearchKey ("Å", "s");
+ AssertSearchKey ("Å", "z");
+ AssertSearchKey ("Å", "z");
+ AssertSearchKey ("Å", "z");
+ AssertSearchKey ("Å", "z");
+ }
+
+ // Test that combining diacritics are preserved, and combined, for non-Latin characters.
+ [Test]
+ public void TestPreserveDiacritics ()
+ {
+ AssertSearchKey ("\u304c", "\u304c");
+ AssertSearchKey ("\u304b\u3099", "\u304c");
+ }
+
+ // Test that some non-Latin characters are converted to Latin counterparts.
+ [Test]
+ public void TestEquivalents ()
+ {
+ AssertSearchKey ("\u00f8", "o");
+ AssertSearchKey ("\u0142", "l");
+ }
+
+ // Test that some kinds of punctuation are removed.
+ [Test]
+ public void TestRemovePunctuation ()
+ {
+ AssertSearchKey ("'", "");
+ AssertSearchKey ("\"", "");
+ AssertSearchKey ("!", "");
+ AssertSearchKey ("?", "");
+ AssertSearchKey ("/", "");
+ }
+ }
}
#endif
Modified: trunk/banshee/src/Libraries/Hyena/Makefile.am
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Makefile.am (original)
+++ trunk/banshee/src/Libraries/Hyena/Makefile.am Wed Feb 11 02:34:53 2009
@@ -58,6 +58,7 @@
Hyena.Json/TokenType.cs \
Hyena.Query/AliasedObjectSet.cs \
Hyena.Query/DateQueryValue.cs \
+ Hyena.Query/ExactStringQueryValue.cs \
Hyena.Query/FileSizeQueryValue.cs \
Hyena.Query/IntegerKeyedObjectQueryValue.cs \
Hyena.Query/IntegerQueryValue.cs \
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]