[banshee/gio-hardware] [Fixup] Better i18n support for article stripping



commit c5f3834df222f840deb5dcc20ff0f1ebcb313d20
Author: Gabriel Burt <gabriel burt gmail com>
Date:   Tue Aug 10 12:29:54 2010 -0700

    [Fixup] Better i18n support for article stripping
    
    Translators can now supply as many articles as their language has.  Add
    unit tests to test the artist normalization, too.  Fixes bgo#625331

 src/Extensions/Banshee.Fixup/Banshee.Fixup.csproj  |    3 +-
 .../Banshee.Fixup/AlbumDuplicateSolver.cs          |   14 +--
 .../Banshee.Fixup/ArtistDuplicateSolver.cs         |   30 ++++---
 .../Banshee.Fixup/Banshee.Fixup/FixSource.cs       |    2 +-
 .../Banshee.Fixup/Banshee.Fixup/Solver.cs          |   59 +++++++++++++-
 .../Banshee.Fixup/Banshee.Fixup/Tests.cs           |   87 ++++++++++++++++++++
 src/Extensions/Banshee.Fixup/Makefile.am           |    1 +
 tests/Makefile.am                                  |    1 +
 8 files changed, 174 insertions(+), 23 deletions(-)
---
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup.csproj b/src/Extensions/Banshee.Fixup/Banshee.Fixup.csproj
index b41e187..608109f 100644
--- a/src/Extensions/Banshee.Fixup/Banshee.Fixup.csproj
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup.csproj
@@ -88,8 +88,9 @@
     <Compile Include="Banshee.Fixup\FixSource.cs" />
     <Compile Include="Banshee.Fixup\Problem.cs" />
     <Compile Include="Banshee.Fixup\ProblemModel.cs" />
-    <Compile Include="Banshee.Fixup\View.cs" />
     <Compile Include="Banshee.Fixup\Solver.cs" />
+    <Compile Include="Banshee.Fixup\Tests.cs" />
+    <Compile Include="Banshee.Fixup\View.cs" />
   </ItemGroup>
   <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
   <ProjectExtensions>
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup/AlbumDuplicateSolver.cs b/src/Extensions/Banshee.Fixup/Banshee.Fixup/AlbumDuplicateSolver.cs
index f88d2c7..b221021 100644
--- a/src/Extensions/Banshee.Fixup/Banshee.Fixup/AlbumDuplicateSolver.cs
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup/AlbumDuplicateSolver.cs
@@ -69,16 +69,12 @@ namespace Banshee.Fixup
                 return null;
 
             ret = ret.ToLower ()
-               .Replace (" and ", " & ")
-               .Replace (Catalog.GetString (" and "), " & ")
-               .Replace (", the", "")
-               .Replace (Catalog.GetString (", the"), "")
-               .Replace ("the ", "")
-               .Replace (Catalog.GetString ("the "), "")
-               .Trim ();
+                     .RemovePrefixedArticles ()
+                     .RemoveSuffixedArticles ()
+                     .NormalizeConjunctions ();
 
-            // Stips whitespace, punctuation, accents, and lower-cases
-            ret = Hyena.StringUtil.SearchKey (ret);
+            // Strip extra whitespace, punctuation, and accents, lower-case, etc
+            ret = Hyena.StringUtil.SearchKey (ret).Trim ();
             return ret + artist;
         }
 
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup/ArtistDuplicateSolver.cs b/src/Extensions/Banshee.Fixup/Banshee.Fixup/ArtistDuplicateSolver.cs
index 4194a71..02a7116 100644
--- a/src/Extensions/Banshee.Fixup/Banshee.Fixup/ArtistDuplicateSolver.cs
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup/ArtistDuplicateSolver.cs
@@ -51,7 +51,7 @@ namespace Banshee.Fixup
                 String.Format (
                     @"(Name IS NOT NULL AND ArtistID IN (SELECT DISTINCT(ArtistID) FROM CoreTracks WHERE PrimarySourceID = {0})
                         OR ArtistID IN (SELECT DISTINCT(a.ArtistID) FROM CoreTracks t, CoreAlbums a WHERE t.AlbumID = a.AlbumID AND t.PrimarySourceID = {0}))",
-                    ServiceManager.SourceManager.MusicLibrary.DbId
+                    EnableUnitTests ? 0 : ServiceManager.SourceManager.MusicLibrary.DbId
                 ),
                 "HYENA_BINARY_FUNCTION ('dupe-artist', Name, NULL)"
             );
@@ -65,23 +65,31 @@ namespace Banshee.Fixup
             BinaryFunction.Remove (Id);
         }
 
-        private object NormalizeArtistName (object name, object null_arg)
+        private string comma = ", ";
+        private string [] comma_ary = new string [] { ", " };
+
+        internal object NormalizeArtistName (object name, object null_arg)
         {
             var ret = name as string;
             if (ret == null)
                 return null;
 
+            // If has only one comma, split on it and reverse the order
+            // eg Matthews, Dave => Dave Matthews
+            int i = ret.IndexOf (comma);
+            if (i != -1 && i == ret.LastIndexOf (comma)) {
+                ret = ret.Split (comma_ary, StringSplitOptions.None)
+                         .Reverse ()
+                         .Join (" ");
+            }
+
             ret = ret.ToLower ()
-               .Replace (" and ", " & ")
-               .Replace (Catalog.GetString (" and "), " & ")
-               .Replace (", the", "")
-               .Replace (Catalog.GetString (", the"), "")
-               .Replace ("the ", "")
-               .Replace (Catalog.GetString ("the "), "")
-               .Trim ();
+                     .RemovePrefixedArticles ()
+                     .RemoveSuffixedArticles ()
+                     .NormalizeConjunctions ();
 
-            // Stips whitespace, punctuation, accents, and lower-cases
-            ret = Hyena.StringUtil.SearchKey (ret);
+            // Strip extra whitespace, punctuation, and accents, lower-case, etc
+            ret = Hyena.StringUtil.SearchKey (ret).Trim ();
             return ret;
         }
 
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup/FixSource.cs b/src/Extensions/Banshee.Fixup/Banshee.Fixup/FixSource.cs
index 8b0e8f6..4331465 100644
--- a/src/Extensions/Banshee.Fixup/Banshee.Fixup/FixSource.cs
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup/FixSource.cs
@@ -63,7 +63,7 @@ namespace Banshee.Fixup
             };
             combo.Active = 0;
 
-            var apply_button = new Hyena.Widgets.ImageButton ("Apply Selected Fixes", "gtk-apply");
+            var apply_button = new Hyena.Widgets.ImageButton (Catalog.GetString ("Apply Selected Fixes"), "gtk-apply");
             apply_button.Clicked += (o, a) => problem_model.Fix ();
             problem_model.Reloaded += (o, a) => apply_button.Sensitive = problem_model.SelectedCount > 0;
 
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup/Solver.cs b/src/Extensions/Banshee.Fixup/Banshee.Fixup/Solver.cs
index 81b5682..8353079 100644
--- a/src/Extensions/Banshee.Fixup/Banshee.Fixup/Solver.cs
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup/Solver.cs
@@ -49,6 +49,9 @@ namespace Banshee.Fixup
         {
         }
 
+        // Total hack to work make unit tests work
+        internal static bool EnableUnitTests;
+
         public string Id {
             get { return id; }
             set {
@@ -57,7 +60,9 @@ namespace Banshee.Fixup
                 }
 
                 id = value;
-                Generation = DatabaseConfigurationClient.Client.Get<int> ("MetadataFixupGeneration", id, 0);
+                if (!EnableUnitTests) {
+                    Generation = DatabaseConfigurationClient.Client.Get<int> ("MetadataFixupGeneration", id, 0);
+                }
             }
         }
 
@@ -141,6 +146,7 @@ namespace Banshee.Fixup
 
         protected override void IdentifyCore ()
         {
+            // Prune artists and albums that are no longer used
             ServiceManager.DbConnection.Execute (@"
                 DELETE FROM CoreAlbums WHERE AlbumID NOT IN (SELECT DISTINCT(AlbumID) FROM CoreTracks);
                 DELETE FROM CoreArtists WHERE
@@ -152,6 +158,57 @@ namespace Banshee.Fixup
                 ServiceManager.DbConnection.Execute (cmd, Generation);
             }
         }
+
+    }
+
+    public static class FixupExtensions
+    {
+        public static string NormalizeConjunctions (this string input)
+        {
+            return input.Replace (" & ", " and ");
+        }
+
+        public static string RemovePrefixedArticles (this string input)
+        {
+            foreach (var prefix in article_prefixes) {
+                if (input.StartsWith (prefix)) {
+                    input = input.Substring (prefix.Length, input.Length - prefix.Length);
+                }
+            }
+            return input;
+        }
+
+        public static string RemoveSuffixedArticles (this string input)
+        {
+            foreach (var suffix in article_suffixes) {
+                if (input.EndsWith (suffix)) {
+                    input = input.Substring (0, input.Length - suffix.Length);
+                }
+            }
+            return input;
+        }
+
+        static string [] article_prefixes;
+        static string [] article_suffixes;
+        static FixupExtensions ()
+        {
+            // Translators: These are articles that might be prefixed or suffixed
+            // on artist names or album titles.  You can add as many as you need,
+            // separated by a pipe (|)
+            var articles = (Catalog.GetString ("a|an|the") + "|a|an|the").Split ('|').Distinct ();
+
+            // Translators: This is the format commonly used in your langauge for
+            // suffixing an article, eg in English: ", The"
+            var suffix_format = Catalog.GetString (", {0}");
+
+            article_prefixes = articles.Select (a => a + " ")
+                                       .ToArray ();
+
+            article_suffixes = articles.SelectMany (a =>
+                new string [] { String.Format (suffix_format, a), ", " +  a }
+            ).Distinct ().ToArray ();
+        }
+
     }
 
     /*public class CompilationSolver : Solver
diff --git a/src/Extensions/Banshee.Fixup/Banshee.Fixup/Tests.cs b/src/Extensions/Banshee.Fixup/Banshee.Fixup/Tests.cs
new file mode 100644
index 0000000..36be2bd
--- /dev/null
+++ b/src/Extensions/Banshee.Fixup/Banshee.Fixup/Tests.cs
@@ -0,0 +1,87 @@
+//
+// Tests.cs
+//
+// Author:
+//   Gabriel Burt <gburt novell com>
+//
+// Copyright (C) 2010 Novell, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#if ENABLE_TESTS
+
+using System;
+using System.Linq;
+
+using NUnit.Framework;
+using GLib;
+
+using Hyena;
+
+namespace Banshee.Fixup
+{
+    [TestFixture]
+    public class FixupTests
+    {
+        ArtistDuplicateSolver artist_solver;
+
+        [SetUp]
+        public void Setup ()
+        {
+            Solver.EnableUnitTests = true;
+            artist_solver = new ArtistDuplicateSolver ();
+        }
+
+        [TearDown]
+        public void Teardown ()
+        {
+        }
+
+        [Test]
+        public void ArtistNormalization ()
+        {
+            AssertArtistNormalized (null, null);
+            AssertArtistNormalized (null, 12);
+            AssertArtistNormalized ("", "");
+            AssertArtistNormalized ("foo", "foo");
+            AssertArtistNormalized ("dave matthews", "Dave Matthews");
+            AssertArtistNormalized ("dave matthews", "Matthews, Dave");
+            AssertArtistNormalized ("black keys", "The Black Keys");
+            AssertArtistNormalized ("black keys", "black Keys, the");
+            AssertArtistNormalized ("beatles", "Beatles");
+            AssertArtistNormalized ("beatles", "The Beatles");
+            AssertArtistNormalized ("beatles", "  Béatles  , The  ");
+            AssertArtistNormalized ("beatles", "Beatles, A");
+            AssertArtistNormalized ("beatles", "Beatles, An");
+            AssertArtistNormalized ("beatles", "A Beatles  ");
+            AssertArtistNormalized ("rem", " R.Ã?.M");
+            AssertArtistNormalized ("belle and sebastian", "Belle & Sebastian");
+            AssertArtistNormalized ("belle and sebastian", "Bellé and Sebastían\t ");
+        }
+
+        private void AssertArtistNormalized (string correct, object input)
+        {
+            Assert.AreEqual (correct, artist_solver.NormalizeArtistName (input, null));
+        }
+    }
+}
+
+#endif
diff --git a/src/Extensions/Banshee.Fixup/Makefile.am b/src/Extensions/Banshee.Fixup/Makefile.am
index 62cf3e2..7a5c893 100644
--- a/src/Extensions/Banshee.Fixup/Makefile.am
+++ b/src/Extensions/Banshee.Fixup/Makefile.am
@@ -13,6 +13,7 @@ SOURCES =  \
 	Banshee.Fixup/Problem.cs \
 	Banshee.Fixup/ProblemModel.cs \
 	Banshee.Fixup/Solver.cs \
+	Banshee.Fixup/Tests.cs \
 	Banshee.Fixup/View.cs
 
 RESOURCES =  \
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 93a1da9..53f8dd6 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -14,6 +14,7 @@ TEST_ASSEMBLIES = \
 	Banshee.Core.dll \
 	Banshee.Gnome.dll \
 	Banshee.Services.dll \
+	Banshee.Fixup.dll \
 	Banshee.Dap.Mtp.dll
 
 if ENABLE_GIO



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]