[easytag] Add a function for comparing UTF-8 strings



commit 834b354f5ddf438ee58fa5824ea7b7849eb05e1e
Author: David King <amigadave amigadave com>
Date:   Sat Feb 21 17:54:42 2015 +0000

    Add a function for comparing UTF-8 strings
    
    Comparing strings for equality with g_utf8_collate() gives a poor
    result, as it is intended for ordering, not equality comparisons. It is
    preferable to normalize the string first, and then to compare the
    normalized result.
    
    Add a new et_normalized_strcmp0() function to do this, and use it in
    et_file_tag_detect_difference() to avoid problems where two strings
    compared identically with g_utf8_collate(), which prevented the user
    from changing tag fields. Adapt the File_Tag test as appropriate.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=744897

 src/file_tag.c        |  129 +++++++++++++++++++++++++------------------------
 src/misc.c            |   41 ++++++++++++++++
 src/misc.h            |    1 +
 tests/test-file_tag.c |   27 +++++++----
 4 files changed, 126 insertions(+), 72 deletions(-)
---
diff --git a/src/file_tag.c b/src/file_tag.c
index 9c66ec4..0324279 100644
--- a/src/file_tag.c
+++ b/src/file_tag.c
@@ -349,98 +349,106 @@ et_file_tag_detect_difference (const File_Tag *FileTag1,
         return TRUE;
 
     /* Title */
-    if ( FileTag1->title && !FileTag2->title && g_utf8_strlen(FileTag1->title, -1)>0 ) return TRUE;
-    if (!FileTag1->title &&  FileTag2->title && g_utf8_strlen(FileTag2->title, -1)>0 ) return TRUE;
-    if ( FileTag1->title &&  FileTag2->title && g_utf8_collate(FileTag1->title,FileTag2->title)!=0 ) return 
TRUE;
+    if (et_normalized_strcmp0 (FileTag1->title, FileTag2->title) != 0)
+    {
+        return TRUE;
+    }
 
     /* Artist */
-    if ( FileTag1->artist && !FileTag2->artist && g_utf8_strlen(FileTag1->artist, -1)>0 ) return TRUE;
-    if (!FileTag1->artist &&  FileTag2->artist && g_utf8_strlen(FileTag2->artist, -1)>0 ) return TRUE;
-    if ( FileTag1->artist &&  FileTag2->artist && g_utf8_collate(FileTag1->artist,FileTag2->artist)!=0 ) 
return TRUE;
+    if (et_normalized_strcmp0 (FileTag1->artist, FileTag2->artist) != 0)
+    {
+        return TRUE;
+    }
 
        /* Album Artist */
-    if ( FileTag1->album_artist && !FileTag2->album_artist && g_utf8_strlen(FileTag1->album_artist, -1)>0 ) 
return TRUE;
-    if (!FileTag1->album_artist &&  FileTag2->album_artist && g_utf8_strlen(FileTag2->album_artist, -1)>0 ) 
return TRUE;
-    if ( FileTag1->album_artist &&  FileTag2->album_artist && 
g_utf8_collate(FileTag1->album_artist,FileTag2->album_artist)!=0 ) return TRUE;
+    if (et_normalized_strcmp0 (FileTag1->album_artist,
+                               FileTag2->album_artist) != 0)
+    {
+        return TRUE;
+    }
 
     /* Album */
-    if ( FileTag1->album && !FileTag2->album && g_utf8_strlen(FileTag1->album, -1)>0 ) return TRUE;
-    if (!FileTag1->album &&  FileTag2->album && g_utf8_strlen(FileTag2->album, -1)>0 ) return TRUE;
-    if ( FileTag1->album &&  FileTag2->album && g_utf8_collate(FileTag1->album,FileTag2->album)!=0 ) return 
TRUE;
-
-    /* Disc Number */
-    if ( FileTag1->disc_number && !FileTag2->disc_number && g_utf8_strlen(FileTag1->disc_number, -1)>0 ) 
return TRUE;
-    if (!FileTag1->disc_number &&  FileTag2->disc_number && g_utf8_strlen(FileTag2->disc_number, -1)>0 ) 
return TRUE;
-    if ( FileTag1->disc_number &&  FileTag2->disc_number && 
g_utf8_collate(FileTag1->disc_number,FileTag2->disc_number)!=0 ) return TRUE;
-
-    /* Discs Total */
-    if (FileTag1->disc_total && !FileTag2->disc_total
-        && g_utf8_strlen (FileTag1->disc_total, -1) > 0)
+    if (et_normalized_strcmp0 (FileTag1->album, FileTag2->album) != 0)
     {
         return TRUE;
     }
 
-    if (!FileTag1->disc_total &&  FileTag2->disc_total
-        && g_utf8_strlen (FileTag2->disc_total, -1) > 0)
+    /* Disc Number */
+    if (et_normalized_strcmp0 (FileTag1->disc_number,
+                               FileTag2->disc_number) != 0)
     {
         return TRUE;
     }
 
-    if (FileTag1->disc_total &&  FileTag2->disc_total
-        && g_utf8_collate (FileTag1->disc_total, FileTag2->disc_total) != 0)
+    /* Discs Total */
+    if (et_normalized_strcmp0 (FileTag1->disc_total,
+                               FileTag2->disc_total) != 0)
     {
         return TRUE;
     }
 
     /* Year */
-    if ( FileTag1->year && !FileTag2->year && g_utf8_strlen(FileTag1->year, -1)>0 ) return TRUE;
-    if (!FileTag1->year &&  FileTag2->year && g_utf8_strlen(FileTag2->year, -1)>0 ) return TRUE;
-    if ( FileTag1->year &&  FileTag2->year && g_utf8_collate(FileTag1->year,FileTag2->year)!=0 ) return TRUE;
+    if (et_normalized_strcmp0 (FileTag1->year, FileTag2->year) != 0)
+    {
+        return TRUE;
+    }
 
     /* Track */
-    if ( FileTag1->track && !FileTag2->track && g_utf8_strlen(FileTag1->track, -1)>0 ) return TRUE;
-    if (!FileTag1->track &&  FileTag2->track && g_utf8_strlen(FileTag2->track, -1)>0 ) return TRUE;
-    if ( FileTag1->track &&  FileTag2->track && g_utf8_collate(FileTag1->track,FileTag2->track)!=0 ) return 
TRUE;
+    if (et_normalized_strcmp0 (FileTag1->track, FileTag2->track) != 0)
+    {
+        return TRUE;
+    }
 
     /* Track Total */
-    if ( FileTag1->track_total && !FileTag2->track_total && g_utf8_strlen(FileTag1->track_total, -1)>0 ) 
return TRUE;
-    if (!FileTag1->track_total &&  FileTag2->track_total && g_utf8_strlen(FileTag2->track_total, -1)>0 ) 
return TRUE;
-    if ( FileTag1->track_total &&  FileTag2->track_total && 
g_utf8_collate(FileTag1->track_total,FileTag2->track_total)!=0 ) return TRUE;
+    if (et_normalized_strcmp0 (FileTag1->track_total,
+                               FileTag2->track_total) != 0)
+    {
+        return TRUE;
+    }
 
     /* Genre */
-    if ( FileTag1->genre && !FileTag2->genre && g_utf8_strlen(FileTag1->genre, -1)>0 ) return TRUE;
-    if (!FileTag1->genre &&  FileTag2->genre && g_utf8_strlen(FileTag2->genre, -1)>0 ) return TRUE;
-    if ( FileTag1->genre &&  FileTag2->genre && g_utf8_collate(FileTag1->genre,FileTag2->genre)!=0 ) return 
TRUE;
+    if (et_normalized_strcmp0 (FileTag1->genre, FileTag2->genre) != 0)
+    {
+        return TRUE;
+    }
 
     /* Comment */
-    if ( FileTag1->comment && !FileTag2->comment && g_utf8_strlen(FileTag1->comment, -1)>0 ) return TRUE;
-    if (!FileTag1->comment &&  FileTag2->comment && g_utf8_strlen(FileTag2->comment, -1)>0 ) return TRUE;
-    if ( FileTag1->comment &&  FileTag2->comment && g_utf8_collate(FileTag1->comment,FileTag2->comment)!=0 ) 
return TRUE;
+    if (et_normalized_strcmp0 (FileTag1->comment, FileTag2->comment) != 0)
+    {
+        return TRUE;
+    }
 
     /* Composer */
-    if ( FileTag1->composer && !FileTag2->composer && g_utf8_strlen(FileTag1->composer, -1)>0 ) return TRUE;
-    if (!FileTag1->composer &&  FileTag2->composer && g_utf8_strlen(FileTag2->composer, -1)>0 ) return TRUE;
-    if ( FileTag1->composer &&  FileTag2->composer && 
g_utf8_collate(FileTag1->composer,FileTag2->composer)!=0 ) return TRUE;
+    if (et_normalized_strcmp0 (FileTag1->composer, FileTag2->composer) != 0)
+    {
+        return TRUE;
+    }
 
     /* Original artist */
-    if ( FileTag1->orig_artist && !FileTag2->orig_artist && g_utf8_strlen(FileTag1->orig_artist, -1)>0 ) 
return TRUE;
-    if (!FileTag1->orig_artist &&  FileTag2->orig_artist && g_utf8_strlen(FileTag2->orig_artist, -1)>0 ) 
return TRUE;
-    if ( FileTag1->orig_artist &&  FileTag2->orig_artist && 
g_utf8_collate(FileTag1->orig_artist,FileTag2->orig_artist)!=0 ) return TRUE;
+    if (et_normalized_strcmp0 (FileTag1->orig_artist,
+                               FileTag2->orig_artist) != 0)
+    {
+        return TRUE;
+    }
 
     /* Copyright */
-    if ( FileTag1->copyright && !FileTag2->copyright && g_utf8_strlen(FileTag1->copyright, -1)>0 ) return 
TRUE;
-    if (!FileTag1->copyright &&  FileTag2->copyright && g_utf8_strlen(FileTag2->copyright, -1)>0 ) return 
TRUE;
-    if ( FileTag1->copyright &&  FileTag2->copyright && 
g_utf8_collate(FileTag1->copyright,FileTag2->copyright)!=0 ) return TRUE;
+    if (et_normalized_strcmp0 (FileTag1->copyright, FileTag2->copyright) != 0)
+    {
+        return TRUE;
+    }
 
     /* URL */
-    if ( FileTag1->url && !FileTag2->url && g_utf8_strlen(FileTag1->url, -1)>0 ) return TRUE;
-    if (!FileTag1->url &&  FileTag2->url && g_utf8_strlen(FileTag2->url, -1)>0 ) return TRUE;
-    if ( FileTag1->url &&  FileTag2->url && g_utf8_collate(FileTag1->url,FileTag2->url)!=0 ) return TRUE;
+    if (et_normalized_strcmp0 (FileTag1->url, FileTag2->url) != 0)
+    {
+        return TRUE;
+    }
 
     /* Encoded by */
-    if ( FileTag1->encoded_by && !FileTag2->encoded_by && g_utf8_strlen(FileTag1->encoded_by, -1)>0 ) return 
TRUE;
-    if (!FileTag1->encoded_by &&  FileTag2->encoded_by && g_utf8_strlen(FileTag2->encoded_by, -1)>0 ) return 
TRUE;
-    if ( FileTag1->encoded_by &&  FileTag2->encoded_by && 
g_utf8_collate(FileTag1->encoded_by,FileTag2->encoded_by)!=0 ) return TRUE;
+    if (et_normalized_strcmp0 (FileTag1->encoded_by,
+                               FileTag2->encoded_by) != 0)
+    {
+        return TRUE;
+    }
 
     /* Picture */
     for (pic1 = FileTag1->picture, pic2 = FileTag2->picture; ;
@@ -459,15 +467,10 @@ et_file_tag_detect_difference (const File_Tag *FileTag1,
         }
         if (pic1->type != pic2->type)
             return TRUE;
-        if (pic1->description && !pic2->description
-        &&  g_utf8_strlen(pic1->description, -1)>0 )
-            return TRUE;
-        if (!pic1->description && pic2->description
-        &&  g_utf8_strlen(pic2->description, -1)>0 )
-            return TRUE;
-        if (pic1->description && pic2->description
-        &&  g_utf8_collate(pic1->description, pic2->description)!=0 )
+        if (et_normalized_strcmp0 (pic1->description, pic2->description) != 0)
+        {
             return TRUE;
+        }
     }
 
     return FALSE; /* No changes */
diff --git a/src/misc.c b/src/misc.c
index 578fa4a..0ca71f6 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -435,3 +435,44 @@ et_undo_key_new (void)
     static guint ETUndoKey = 0;
     return ++ETUndoKey;
 }
+
+/*
+ * et_normalized_strcmp0:
+ * @str1: UTF-8 string, or %NULL
+ * @str2: UTF-8 string to compare against, or %NULL
+ *
+ * Compare two UTF-8 strings, normalizing them before doing so, and return the
+ * difference.
+ *
+ * Returns: an integer less than, equal to, or greater than zero, if str1 is <,
+ * == or > than str2
+ */
+gint
+et_normalized_strcmp0 (const gchar *str1,
+                       const gchar *str2)
+{
+    gint result;
+    gchar *normalized1;
+    gchar *normalized2;
+
+    /* Check for NULL, as it cannot be passed to g_utf8_normalize(). */
+    if (!str1)
+    {
+        return -(str1 != str2);
+    }
+
+    if (!str2)
+    {
+        return str1 != str2;
+    }
+
+    normalized1 = g_utf8_normalize (str1, -1, G_NORMALIZE_DEFAULT);
+    normalized2 = g_utf8_normalize (str2, -1, G_NORMALIZE_DEFAULT);
+
+    result = g_strcmp0 (normalized1, normalized2);
+
+    g_free (normalized1);
+    g_free (normalized2);
+
+    return result;
+}
diff --git a/src/misc.h b/src/misc.h
index 41183d6..48b0891 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -40,6 +40,7 @@ gchar * et_track_number_to_string (const guint track_number);
 void et_filename_prepare (gchar *filename_utf8, gboolean replace_illegal);
 
 guint et_undo_key_new (void);
+gint et_normalized_strcmp0 (const gchar *str1, const gchar *str2);
 
 G_END_DECLS
 
diff --git a/tests/test-file_tag.c b/tests/test-file_tag.c
index 2b85b3c..2efa5bf 100644
--- a/tests/test-file_tag.c
+++ b/tests/test-file_tag.c
@@ -78,21 +78,30 @@ file_tag_difference (void)
 
     g_assert (tag1);
 
-    et_file_tag_set_title (tag1, "foo");
-    et_file_tag_set_artist (tag1, "bar");
-    et_file_tag_set_album_artist (tag1, "baz");
+    et_file_tag_set_title (tag1, "foo:");
 
-    g_assert_cmpstr (tag1->title, ==, "foo");
-    g_assert_cmpstr (tag1->artist, ==, "bar");
-    g_assert_cmpstr (tag1->album_artist, ==, "baz");
+    /* Contains a full-width colon, which should compare differently to a
+     * colon. */
+    g_assert_cmpstr (tag1->title, ==, "foo:");
 
     tag2 = et_file_tag_new ();
 
     g_assert (tag2);
 
-    et_file_tag_set_title (tag2, "flub");
-    et_file_tag_set_artist (tag2, "blub");
-    et_file_tag_set_album_artist (tag2, "slub");
+    et_file_tag_set_title (tag2, "foo:");
+
+    g_assert (et_file_tag_detect_difference (tag1, tag2));
+
+    et_file_tag_free (tag2);
+    et_file_tag_free (tag1);
+
+    tag1 = et_file_tag_new ();
+
+    et_file_tag_set_artist (tag1, "bar");
+
+    tag2 = et_file_tag_new ();
+
+    et_file_tag_set_artist (tag2, "baz");
 
     g_assert (et_file_tag_detect_difference (tag1, tag2));
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]