banshee r5031 - in trunk/banshee: . src/Core/Banshee.Services/Banshee.Collection.Database src/Core/Banshee.Services/Banshee.Database src/Core/Banshee.Services/Banshee.Query src/Libraries/Hyena src/Libraries/Hyena/Hyena src/Libraries/Hyena/Hyena.Data.Sqlite src/Libraries/Hyena/Hyena.Query src/Libraries/Hyena/Hyena.Query/Tests src/Libraries/Hyena/Hyena/Tests



Author: gburt
Date: Wed Feb 11 02:34:53 2009
New Revision: 5031
URL: http://svn.gnome.org/viewvc/banshee?rev=5031&view=rev

Log:
2009-02-10  Gabriel Burt  <gabriel burt gmail com>

	Patch from John Millikin adding support for full/proper case-insensitive
	unicode search, and additional support for fuzzy searching where n matches
	Ã etc (NonSpacingMarks are stripped from a-z chars).

	* src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseAlbumInfo.cs:
	* src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseArtistInfo.cs:
	* src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseTrackInfo.cs:
	Instead of calling ToLower on *Lowered properties, use the new
	Hyena.StringUtil.SearchKey that also strips accents from Latin chars.

	* src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs:
	Update the existing *Lowered values by using the new HYENA_SEARCH_KEY
	custom Sqlite method that just calls SearchKey.

	* src/Core/Banshee.Services/Banshee.Query/BansheeQuery.cs: Mark MimeType,
	License, and Location as ExactStringQueryValue fields (as opposed to the
	new default which strips accents and lowercases).

	* src/Libraries/Hyena/Hyena.Data.Sqlite/SqliteUtils.cs: New custom
	HYENA_SEARCH_KEY sqlite method.

	* src/Libraries/Hyena/Hyena.Query/QueryField.cs: Get rid of the old/hacky
	Sqlite-LOWER workaround, replacing it with the custom HYENA_SEARCH_KEY
	method where appropriate.

	* src/Libraries/Hyena/Hyena.Query/StringQueryValue.cs: In ToSql return the
	value passed through SearchKey.

	* src/Libraries/Hyena/Hyena.Query/Tests/QueryTests.cs: Test the new fuzzy
	and exact string matching.

	* src/Libraries/Hyena/Makefile.am:
	* src/Libraries/Hyena/Hyena/StringUtil.cs: New SearchKey method that
	lowercases and strips accents of a-z chars.

	* src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs: Test the SearchKey
	method.

	* src/Libraries/Hyena/Hyena.Query/ExactStringQueryValue.cs: New
	StringQueryValue subclass that doesn't pass the value through SearchKey.

Added:
   trunk/banshee/src/Libraries/Hyena/Hyena.Query/ExactStringQueryValue.cs
Modified:
   trunk/banshee/ChangeLog
   trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseAlbumInfo.cs
   trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseArtistInfo.cs
   trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseTrackInfo.cs
   trunk/banshee/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs
   trunk/banshee/src/Core/Banshee.Services/Banshee.Query/BansheeQuery.cs
   trunk/banshee/src/Libraries/Hyena/Hyena.Data.Sqlite/SqliteUtils.cs
   trunk/banshee/src/Libraries/Hyena/Hyena.Query/QueryField.cs
   trunk/banshee/src/Libraries/Hyena/Hyena.Query/StringQueryValue.cs
   trunk/banshee/src/Libraries/Hyena/Hyena.Query/Tests/QueryTests.cs
   trunk/banshee/src/Libraries/Hyena/Hyena/StringUtil.cs
   trunk/banshee/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs
   trunk/banshee/src/Libraries/Hyena/Makefile.am

Modified: trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseAlbumInfo.cs
==============================================================================
--- trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseAlbumInfo.cs	(original)
+++ trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseAlbumInfo.cs	Wed Feb 11 02:34:53 2009
@@ -214,12 +214,12 @@
 
         [DatabaseColumn(Select = false)]
         protected string TitleLowered {
-            get { return Title == null ? null : Title.ToLower (); }
+            get { return Hyena.StringUtil.SearchKey (Title); }
         }
 
         [DatabaseColumn(Select = false)]
         protected string ArtistNameLowered {
-            get { return ArtistName == null ? null : ArtistName.ToLower (); }
+            get { return Hyena.StringUtil.SearchKey (ArtistName); }
         }
 
         public override string ToString ()

Modified: trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseArtistInfo.cs
==============================================================================
--- trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseArtistInfo.cs	(original)
+++ trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseArtistInfo.cs	Wed Feb 11 02:34:53 2009
@@ -135,7 +135,7 @@
 
         [DatabaseColumn(Select = false)]
         protected string NameLowered {
-            get { return Name == null ? null : Name.ToLower (); }
+            get { return Hyena.StringUtil.SearchKey (Name); }
         }
 
         [DatabaseColumn]

Modified: trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseTrackInfo.cs
==============================================================================
--- trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseTrackInfo.cs	(original)
+++ trunk/banshee/src/Core/Banshee.Services/Banshee.Collection.Database/DatabaseTrackInfo.cs	Wed Feb 11 02:34:53 2009
@@ -458,7 +458,7 @@
         
         [DatabaseColumn(Select = false)]
         protected string TitleLowered {
-            get { return TrackTitle == null ? null : TrackTitle.ToLower (); }
+            get { return Hyena.StringUtil.SearchKey (TrackTitle); }
         }
 
         [DatabaseColumn(Select = false)]

Modified: trunk/banshee/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs
==============================================================================
--- trunk/banshee/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs	(original)
+++ trunk/banshee/src/Core/Banshee.Services/Banshee.Database/BansheeDbFormatMigrator.cs	Wed Feb 11 02:34:53 2009
@@ -52,7 +52,7 @@
         // NOTE: Whenever there is a change in ANY of the database schema,
         //       this version MUST be incremented and a migration method
         //       MUST be supplied to match the new version number
-        protected const int CURRENT_VERSION = 23;
+        protected const int CURRENT_VERSION = 24;
         protected const int CURRENT_METADATA_VERSION = 5;
         
 #region Migration Driver
@@ -566,6 +566,18 @@
         
 #endregion
 
+#region Version 24
+        [DatabaseVersion (24)]
+        private bool Migrate_24 ()
+        {
+            Execute ("UPDATE CoreArtists SET NameLowered = HYENA_SEARCH_KEY(Name)");
+            Execute ("UPDATE CoreAlbums SET ArtistNameLowered = HYENA_SEARCH_KEY(ArtistName)");
+            Execute ("UPDATE CoreAlbums SET TitleLowered = HYENA_SEARCH_KEY(Title)");
+            Execute ("UPDATE CoreTracks SET TitleLowered = HYENA_SEARCH_KEY(Title)");
+            return true;
+        }
+#endregion
+
 #pragma warning restore 0169
         
 #region Fresh database setup

Modified: trunk/banshee/src/Core/Banshee.Services/Banshee.Query/BansheeQuery.cs
==============================================================================
--- trunk/banshee/src/Core/Banshee.Services/Banshee.Query/BansheeQuery.cs	(original)
+++ trunk/banshee/src/Core/Banshee.Services/Banshee.Query/BansheeQuery.cs	Wed Feb 11 02:34:53 2009
@@ -221,7 +221,7 @@
         public static QueryField LicenseUriField = new QueryField (
             "licenseuri", "LicenseUri",
             // Translators: noun
-            Catalog.GetString ("License"), "CoreTracks.LicenseUri", false,
+            Catalog.GetString ("License"), "CoreTracks.LicenseUri", typeof(ExactStringQueryValue),
             // Translators: These are unique search fields (and nouns).  Please, no spaces. Blank ok.
             Catalog.GetString ("license"), Catalog.GetString ("licensed"), Catalog.GetString ("under"),
             "license", "licensed", "under"
@@ -261,7 +261,7 @@
 
         public static QueryField UriField = new QueryField (
             "uri", "Uri",
-            Catalog.GetString ("File Location"), "CoreTracks.Uri",
+            Catalog.GetString ("File Location"), "CoreTracks.Uri", typeof(ExactStringQueryValue),
             // Translators: These are unique search fields.  Please, no spaces. Blank ok.
             Catalog.GetString ("uri"), Catalog.GetString ("path"), Catalog.GetString ("file"), Catalog.GetString ("location"),
             "uri", "path", "file", "location"
@@ -277,7 +277,7 @@
 
         public static QueryField MimeTypeField = new QueryField (
             "mimetype", "MimeType",
-            Catalog.GetString ("Mime Type"), "CoreTracks.MimeType {0} OR CoreTracks.Uri {0}",
+            Catalog.GetString ("Mime Type"), "CoreTracks.MimeType {0} OR CoreTracks.Uri {0}", typeof(ExactStringQueryValue),
             // Translators: These are unique search fields.  Please, no spaces. Blank ok.
             Catalog.GetString ("type"), Catalog.GetString ("mimetype"), Catalog.GetString ("format"), Catalog.GetString ("ext"),
             "type", "mimetype", "format", "ext", "mime"

Modified: trunk/banshee/src/Libraries/Hyena/Hyena.Data.Sqlite/SqliteUtils.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena.Data.Sqlite/SqliteUtils.cs	(original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena.Data.Sqlite/SqliteUtils.cs	Wed Feb 11 02:34:53 2009
@@ -29,6 +29,7 @@
 using System;
 using System.Reflection;
 using System.Text;
+using Mono.Data.Sqlite;
 
 namespace Hyena.Data.Sqlite
 {
@@ -122,4 +123,12 @@
             return builder.ToString ();
         }
     }
+    
+    [SqliteFunction (Name = "HYENA_SEARCH_KEY", FuncType = FunctionType.Scalar, Arguments = 1)]
+    internal class SearchKeyFunction : SqliteFunction
+    {
+        public override object Invoke (object[] args) {
+            return Hyena.StringUtil.SearchKey (args[0] as string);
+        }
+    }
 }

Added: trunk/banshee/src/Libraries/Hyena/Hyena.Query/ExactStringQueryValue.cs
==============================================================================
--- (empty file)
+++ trunk/banshee/src/Libraries/Hyena/Hyena.Query/ExactStringQueryValue.cs	Wed Feb 11 02:34:53 2009
@@ -0,0 +1,39 @@
+//
+// ExactStringQueryValue.cs
+//
+// Authors:
+//   John Millikin <jmillikin gmail com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+using System;
+
+namespace Hyena.Query
+{
+    // A query value that requires the string match exactly
+    public class ExactStringQueryValue : StringQueryValue
+    {
+        public override string ToSql ()
+        {
+            return String.IsNullOrEmpty (value) ? null : value.Replace ("'", "''").ToLower ();
+        }
+    }
+}

Modified: trunk/banshee/src/Libraries/Hyena/Hyena.Query/QueryField.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena.Query/QueryField.cs	(original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena.Query/QueryField.cs	Wed Feb 11 02:34:53 2009
@@ -150,23 +150,11 @@
             StringBuilder sb = new StringBuilder ();
 
             if (no_custom_format) {
-                if (qv is StringQueryValue) {
-                    if (column_lowered) {
-                        // The column is pre-lowered, no need to call lower() in SQL
-                        sb.AppendFormat ("{0} {1}", Column, String.Format (op.SqlFormat, value.ToLower ()));
-                    } else {
-                        // Match string values literally and against a lower'd version.  Mostly a workaround
-                        // the fact that Sqlite's lower() method only works for ASCII (meaning even with this,
-                        // we're not getting 100% case-insensitive matching).
-                        sb.AppendFormat ("({0} {1} {3} LOWER({0}) {2})", Column,
-                            String.Format (op.SqlFormat, value),
-                            String.Format (op.SqlFormat, value.ToLower ()),
-                            op.IsNot ? "AND" : "OR"
-                        );
-                    }
-                } else {
-                    sb.AppendFormat ("{0} {1}", Column, String.Format (op.SqlFormat, value));
+                string column_with_key = Column;
+                if (qv is StringQueryValue && !(column_lowered || qv is ExactStringQueryValue)) {
+                    column_with_key = String.Format ("HYENA_SEARCH_KEY({0})", Column);
                 }
+                sb.AppendFormat ("{0} {1}", column_with_key, String.Format (op.SqlFormat, value));
 
                 if (op.IsNot) {
                     return String.Format ("({0} OR {1} IS NULL)", sb.ToString (), Column);

Modified: trunk/banshee/src/Libraries/Hyena/Hyena.Query/StringQueryValue.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena.Query/StringQueryValue.cs	(original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena.Query/StringQueryValue.cs	Wed Feb 11 02:34:53 2009
@@ -79,7 +79,9 @@
 
         public override string ToSql ()
         {
-            return String.IsNullOrEmpty (value) ? null : value.Replace ("'", "''");
+            // SearchKey() removes ' anyway, but it's escaped again so proper
+            // SQL behavior isn't dependent on search behavior.
+            return Hyena.StringUtil.SearchKey (value).Replace ("'", "''");
         }
     }
 }

Modified: trunk/banshee/src/Libraries/Hyena/Hyena.Query/Tests/QueryTests.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena.Query/Tests/QueryTests.cs	(original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena.Query/Tests/QueryTests.cs	Wed Feb 11 02:34:53 2009
@@ -86,6 +86,16 @@
             qv = new StringQueryValue (); qv.ParseUserQuery ("foo 'bar'");
             Assert.AreEqual ("foo 'bar'", qv.Value);
             Assert.AreEqual ("foo 'bar'", qv.ToUserQuery ());
+            Assert.AreEqual ("foo bar", qv.ToSql ());
+
+            qv = new StringQueryValue (); qv.ParseUserQuery ("Foo BaÃo");
+            Assert.AreEqual ("Foo BaÃo", qv.Value);
+            Assert.AreEqual ("Foo BaÃo", qv.ToUserQuery ());
+            Assert.AreEqual ("foo bano", qv.ToSql ());
+    
+            qv = new ExactStringQueryValue (); qv.ParseUserQuery ("foo 'bar'");
+            Assert.AreEqual ("foo 'bar'", qv.Value);
+            Assert.AreEqual ("foo 'bar'", qv.ToUserQuery ());
             Assert.AreEqual ("foo ''bar''", qv.ToSql ());
     
             qv = new IntegerQueryValue (); qv.ParseUserQuery ("22");

Modified: trunk/banshee/src/Libraries/Hyena/Hyena/StringUtil.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena/StringUtil.cs	(original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena/StringUtil.cs	Wed Feb 11 02:34:53 2009
@@ -27,6 +27,7 @@
 //
 
 using System;
+using System.Collections.Generic;
 using System.Text;
 using System.Globalization;
 using System.Text.RegularExpressions;
@@ -161,6 +162,49 @@
                 return (int)num + 1;
         }
         
+        // A mapping of non-Latin characters to be considered the same as
+        // a Latin equivalent.
+        private static Dictionary<char, char> BuildSpecialCases ()
+        {
+            Dictionary<char, char> dict = new Dictionary<char, char> ();
+            dict['\u00f8'] = 'o';
+            dict['\u0142'] = 'l';
+            return dict;
+        }
+        private static Dictionary<char, char> ignored_special_cases = BuildSpecialCases ();
+        
+        //  Removes accents from Latin characters, and some kinds of punctuation.
+        public static string SearchKey (string val)
+        {
+            if (String.IsNullOrEmpty (val)) {
+                return val;
+            }
+            
+            val = val.ToLower ();
+            StringBuilder sb = new StringBuilder ();
+            UnicodeCategory category;
+            bool previous_was_latin = false;
+            
+            // Normalizing to KD splits into (base, combining) so we can check for Latin
+            // characters and then strip off any NonSpacingMarks following them
+            foreach (char c in val.Normalize (NormalizationForm.FormKD)) {
+                category = Char.GetUnicodeCategory (c);
+
+                if (ignored_special_cases.ContainsKey (c)) {
+                    sb.Append (ignored_special_cases[c]);
+                } else if (category == UnicodeCategory.OtherPunctuation) {
+                    // Skip punctuation
+                } else if (!(previous_was_latin && category == UnicodeCategory.NonSpacingMark)) {
+                    sb.Append (c);
+                }
+
+                // Can ignore A-Z because we've already lowercased the char
+                previous_was_latin = (c >= 'a' && c <= 'z');
+            }
+            
+            return sb.ToString ().Normalize (NormalizationForm.FormKC);
+        }
+        
         private static string invalid_path_characters = "\"\\:'~`! #$%^&*_-+|?/><[]";
         private static Regex invalid_path_regex;
         

Modified: trunk/banshee/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs	(original)
+++ trunk/banshee/src/Libraries/Hyena/Hyena/Tests/StringUtilTests.cs	Wed Feb 11 02:34:53 2009
@@ -150,6 +150,92 @@
 href=http://lkjdflkjdflkjj>baz foo< /a> bar"));
         }
     }
+    
+    [TestFixture]
+    public class SearchKeyTests
+    {
+        private void AssertSearchKey (string before, string after)
+        {
+            Assert.AreEqual (after, StringUtil.SearchKey (before));
+        }
+        
+        [Test]
+        public void TestEmpty ()
+        {
+            AssertSearchKey ("", "");
+            AssertSearchKey (null, null);
+        }
+        
+        // Test that resulting search keys are in lower-case
+        [Test]
+        public void TestLowercase ()
+        {
+            AssertSearchKey ("A", "a");
+            AssertSearchKey ("\u0104", "a");
+        }
+        
+        // Test that combining diacritics are removed from Latin characters.
+        [Test]
+        public void TestRemoveDiacritics ()
+        {
+            AssertSearchKey ("\u00e9", "e");
+            AssertSearchKey ("e\u0301", "e");
+            
+            AssertSearchKey ("\u014d", "o");
+            AssertSearchKey ("o\u0304", "o");
+
+            AssertSearchKey ("EspaÃol", "espanol");
+            AssertSearchKey ("30 aÃos de la revoluciÃn iranÃ", "30 anos de la revolucion irani");
+            AssertSearchKey ("FRANCÃS", "frances");
+
+            // Polish letters
+            AssertSearchKey ("Ä", "a");
+            AssertSearchKey ("Ä", "a");
+            AssertSearchKey ("Ä", "c");
+            AssertSearchKey ("Ä", "c");
+            AssertSearchKey ("Ä", "e");
+            AssertSearchKey ("Ä", "e");
+            AssertSearchKey ("Å", "l");
+            AssertSearchKey ("Å", "l");
+            AssertSearchKey ("Å", "n");
+            AssertSearchKey ("Å", "n");
+            AssertSearchKey ("Ã", "o");
+            AssertSearchKey ("Ã", "o");
+            AssertSearchKey ("Å", "s");
+            AssertSearchKey ("Å", "s");
+            AssertSearchKey ("Å", "z");
+            AssertSearchKey ("Å", "z");
+            AssertSearchKey ("Å", "z");
+            AssertSearchKey ("Å", "z");
+        }
+        
+        // Test that combining diacritics are preserved, and combined, for non-Latin characters.
+        [Test]
+        public void TestPreserveDiacritics ()
+        {
+            AssertSearchKey ("\u304c", "\u304c");
+            AssertSearchKey ("\u304b\u3099", "\u304c");
+        }
+        
+        // Test that some non-Latin characters are converted to Latin counterparts.
+        [Test]
+        public void TestEquivalents ()
+        {
+            AssertSearchKey ("\u00f8", "o");
+            AssertSearchKey ("\u0142", "l");
+        }
+        
+        // Test that some kinds of punctuation are removed.
+        [Test]
+        public void TestRemovePunctuation ()
+        {
+            AssertSearchKey ("'", "");
+            AssertSearchKey ("\"", "");
+            AssertSearchKey ("!", "");
+            AssertSearchKey ("?", "");
+            AssertSearchKey ("/", "");
+        }
+    }
 }
 
 #endif

Modified: trunk/banshee/src/Libraries/Hyena/Makefile.am
==============================================================================
--- trunk/banshee/src/Libraries/Hyena/Makefile.am	(original)
+++ trunk/banshee/src/Libraries/Hyena/Makefile.am	Wed Feb 11 02:34:53 2009
@@ -58,6 +58,7 @@
 	Hyena.Json/TokenType.cs \
 	Hyena.Query/AliasedObjectSet.cs \
 	Hyena.Query/DateQueryValue.cs \
+	Hyena.Query/ExactStringQueryValue.cs \
 	Hyena.Query/FileSizeQueryValue.cs \
 	Hyena.Query/IntegerKeyedObjectQueryValue.cs \
 	Hyena.Query/IntegerQueryValue.cs \



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]