[easytag] Add a function for comparing UTF-8 strings
- From: David King <davidk src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [easytag] Add a function for comparing UTF-8 strings
- Date: Sat, 21 Feb 2015 18:12:00 +0000 (UTC)
commit 834b354f5ddf438ee58fa5824ea7b7849eb05e1e
Author: David King <amigadave amigadave com>
Date: Sat Feb 21 17:54:42 2015 +0000
Add a function for comparing UTF-8 strings
Comparing strings for equality with g_utf8_collate() gives a poor
result, as it is intended for ordering, not equality comparisons. It is
preferable to normalize the string first, and then to compare the
normalized result.
Add a new et_normalized_strcmp0() function to do this, and use it in
et_file_tag_detect_difference() to avoid problems where two strings
compared identically with g_utf8_collate(), which prevented the user
from changing tag fields. Adapt the File_Tag test as appropriate.
https://bugzilla.gnome.org/show_bug.cgi?id=744897
src/file_tag.c | 129 +++++++++++++++++++++++++------------------------
src/misc.c | 41 ++++++++++++++++
src/misc.h | 1 +
tests/test-file_tag.c | 27 +++++++----
4 files changed, 126 insertions(+), 72 deletions(-)
---
diff --git a/src/file_tag.c b/src/file_tag.c
index 9c66ec4..0324279 100644
--- a/src/file_tag.c
+++ b/src/file_tag.c
@@ -349,98 +349,106 @@ et_file_tag_detect_difference (const File_Tag *FileTag1,
return TRUE;
/* Title */
- if ( FileTag1->title && !FileTag2->title && g_utf8_strlen(FileTag1->title, -1)>0 ) return TRUE;
- if (!FileTag1->title && FileTag2->title && g_utf8_strlen(FileTag2->title, -1)>0 ) return TRUE;
- if ( FileTag1->title && FileTag2->title && g_utf8_collate(FileTag1->title,FileTag2->title)!=0 ) return
TRUE;
+ if (et_normalized_strcmp0 (FileTag1->title, FileTag2->title) != 0)
+ {
+ return TRUE;
+ }
/* Artist */
- if ( FileTag1->artist && !FileTag2->artist && g_utf8_strlen(FileTag1->artist, -1)>0 ) return TRUE;
- if (!FileTag1->artist && FileTag2->artist && g_utf8_strlen(FileTag2->artist, -1)>0 ) return TRUE;
- if ( FileTag1->artist && FileTag2->artist && g_utf8_collate(FileTag1->artist,FileTag2->artist)!=0 )
return TRUE;
+ if (et_normalized_strcmp0 (FileTag1->artist, FileTag2->artist) != 0)
+ {
+ return TRUE;
+ }
/* Album Artist */
- if ( FileTag1->album_artist && !FileTag2->album_artist && g_utf8_strlen(FileTag1->album_artist, -1)>0 )
return TRUE;
- if (!FileTag1->album_artist && FileTag2->album_artist && g_utf8_strlen(FileTag2->album_artist, -1)>0 )
return TRUE;
- if ( FileTag1->album_artist && FileTag2->album_artist &&
g_utf8_collate(FileTag1->album_artist,FileTag2->album_artist)!=0 ) return TRUE;
+ if (et_normalized_strcmp0 (FileTag1->album_artist,
+ FileTag2->album_artist) != 0)
+ {
+ return TRUE;
+ }
/* Album */
- if ( FileTag1->album && !FileTag2->album && g_utf8_strlen(FileTag1->album, -1)>0 ) return TRUE;
- if (!FileTag1->album && FileTag2->album && g_utf8_strlen(FileTag2->album, -1)>0 ) return TRUE;
- if ( FileTag1->album && FileTag2->album && g_utf8_collate(FileTag1->album,FileTag2->album)!=0 ) return
TRUE;
-
- /* Disc Number */
- if ( FileTag1->disc_number && !FileTag2->disc_number && g_utf8_strlen(FileTag1->disc_number, -1)>0 )
return TRUE;
- if (!FileTag1->disc_number && FileTag2->disc_number && g_utf8_strlen(FileTag2->disc_number, -1)>0 )
return TRUE;
- if ( FileTag1->disc_number && FileTag2->disc_number &&
g_utf8_collate(FileTag1->disc_number,FileTag2->disc_number)!=0 ) return TRUE;
-
- /* Discs Total */
- if (FileTag1->disc_total && !FileTag2->disc_total
- && g_utf8_strlen (FileTag1->disc_total, -1) > 0)
+ if (et_normalized_strcmp0 (FileTag1->album, FileTag2->album) != 0)
{
return TRUE;
}
- if (!FileTag1->disc_total && FileTag2->disc_total
- && g_utf8_strlen (FileTag2->disc_total, -1) > 0)
+ /* Disc Number */
+ if (et_normalized_strcmp0 (FileTag1->disc_number,
+ FileTag2->disc_number) != 0)
{
return TRUE;
}
- if (FileTag1->disc_total && FileTag2->disc_total
- && g_utf8_collate (FileTag1->disc_total, FileTag2->disc_total) != 0)
+ /* Discs Total */
+ if (et_normalized_strcmp0 (FileTag1->disc_total,
+ FileTag2->disc_total) != 0)
{
return TRUE;
}
/* Year */
- if ( FileTag1->year && !FileTag2->year && g_utf8_strlen(FileTag1->year, -1)>0 ) return TRUE;
- if (!FileTag1->year && FileTag2->year && g_utf8_strlen(FileTag2->year, -1)>0 ) return TRUE;
- if ( FileTag1->year && FileTag2->year && g_utf8_collate(FileTag1->year,FileTag2->year)!=0 ) return TRUE;
+ if (et_normalized_strcmp0 (FileTag1->year, FileTag2->year) != 0)
+ {
+ return TRUE;
+ }
/* Track */
- if ( FileTag1->track && !FileTag2->track && g_utf8_strlen(FileTag1->track, -1)>0 ) return TRUE;
- if (!FileTag1->track && FileTag2->track && g_utf8_strlen(FileTag2->track, -1)>0 ) return TRUE;
- if ( FileTag1->track && FileTag2->track && g_utf8_collate(FileTag1->track,FileTag2->track)!=0 ) return
TRUE;
+ if (et_normalized_strcmp0 (FileTag1->track, FileTag2->track) != 0)
+ {
+ return TRUE;
+ }
/* Track Total */
- if ( FileTag1->track_total && !FileTag2->track_total && g_utf8_strlen(FileTag1->track_total, -1)>0 )
return TRUE;
- if (!FileTag1->track_total && FileTag2->track_total && g_utf8_strlen(FileTag2->track_total, -1)>0 )
return TRUE;
- if ( FileTag1->track_total && FileTag2->track_total &&
g_utf8_collate(FileTag1->track_total,FileTag2->track_total)!=0 ) return TRUE;
+ if (et_normalized_strcmp0 (FileTag1->track_total,
+ FileTag2->track_total) != 0)
+ {
+ return TRUE;
+ }
/* Genre */
- if ( FileTag1->genre && !FileTag2->genre && g_utf8_strlen(FileTag1->genre, -1)>0 ) return TRUE;
- if (!FileTag1->genre && FileTag2->genre && g_utf8_strlen(FileTag2->genre, -1)>0 ) return TRUE;
- if ( FileTag1->genre && FileTag2->genre && g_utf8_collate(FileTag1->genre,FileTag2->genre)!=0 ) return
TRUE;
+ if (et_normalized_strcmp0 (FileTag1->genre, FileTag2->genre) != 0)
+ {
+ return TRUE;
+ }
/* Comment */
- if ( FileTag1->comment && !FileTag2->comment && g_utf8_strlen(FileTag1->comment, -1)>0 ) return TRUE;
- if (!FileTag1->comment && FileTag2->comment && g_utf8_strlen(FileTag2->comment, -1)>0 ) return TRUE;
- if ( FileTag1->comment && FileTag2->comment && g_utf8_collate(FileTag1->comment,FileTag2->comment)!=0 )
return TRUE;
+ if (et_normalized_strcmp0 (FileTag1->comment, FileTag2->comment) != 0)
+ {
+ return TRUE;
+ }
/* Composer */
- if ( FileTag1->composer && !FileTag2->composer && g_utf8_strlen(FileTag1->composer, -1)>0 ) return TRUE;
- if (!FileTag1->composer && FileTag2->composer && g_utf8_strlen(FileTag2->composer, -1)>0 ) return TRUE;
- if ( FileTag1->composer && FileTag2->composer &&
g_utf8_collate(FileTag1->composer,FileTag2->composer)!=0 ) return TRUE;
+ if (et_normalized_strcmp0 (FileTag1->composer, FileTag2->composer) != 0)
+ {
+ return TRUE;
+ }
/* Original artist */
- if ( FileTag1->orig_artist && !FileTag2->orig_artist && g_utf8_strlen(FileTag1->orig_artist, -1)>0 )
return TRUE;
- if (!FileTag1->orig_artist && FileTag2->orig_artist && g_utf8_strlen(FileTag2->orig_artist, -1)>0 )
return TRUE;
- if ( FileTag1->orig_artist && FileTag2->orig_artist &&
g_utf8_collate(FileTag1->orig_artist,FileTag2->orig_artist)!=0 ) return TRUE;
+ if (et_normalized_strcmp0 (FileTag1->orig_artist,
+ FileTag2->orig_artist) != 0)
+ {
+ return TRUE;
+ }
/* Copyright */
- if ( FileTag1->copyright && !FileTag2->copyright && g_utf8_strlen(FileTag1->copyright, -1)>0 ) return
TRUE;
- if (!FileTag1->copyright && FileTag2->copyright && g_utf8_strlen(FileTag2->copyright, -1)>0 ) return
TRUE;
- if ( FileTag1->copyright && FileTag2->copyright &&
g_utf8_collate(FileTag1->copyright,FileTag2->copyright)!=0 ) return TRUE;
+ if (et_normalized_strcmp0 (FileTag1->copyright, FileTag2->copyright) != 0)
+ {
+ return TRUE;
+ }
/* URL */
- if ( FileTag1->url && !FileTag2->url && g_utf8_strlen(FileTag1->url, -1)>0 ) return TRUE;
- if (!FileTag1->url && FileTag2->url && g_utf8_strlen(FileTag2->url, -1)>0 ) return TRUE;
- if ( FileTag1->url && FileTag2->url && g_utf8_collate(FileTag1->url,FileTag2->url)!=0 ) return TRUE;
+ if (et_normalized_strcmp0 (FileTag1->url, FileTag2->url) != 0)
+ {
+ return TRUE;
+ }
/* Encoded by */
- if ( FileTag1->encoded_by && !FileTag2->encoded_by && g_utf8_strlen(FileTag1->encoded_by, -1)>0 ) return
TRUE;
- if (!FileTag1->encoded_by && FileTag2->encoded_by && g_utf8_strlen(FileTag2->encoded_by, -1)>0 ) return
TRUE;
- if ( FileTag1->encoded_by && FileTag2->encoded_by &&
g_utf8_collate(FileTag1->encoded_by,FileTag2->encoded_by)!=0 ) return TRUE;
+ if (et_normalized_strcmp0 (FileTag1->encoded_by,
+ FileTag2->encoded_by) != 0)
+ {
+ return TRUE;
+ }
/* Picture */
for (pic1 = FileTag1->picture, pic2 = FileTag2->picture; ;
@@ -459,15 +467,10 @@ et_file_tag_detect_difference (const File_Tag *FileTag1,
}
if (pic1->type != pic2->type)
return TRUE;
- if (pic1->description && !pic2->description
- && g_utf8_strlen(pic1->description, -1)>0 )
- return TRUE;
- if (!pic1->description && pic2->description
- && g_utf8_strlen(pic2->description, -1)>0 )
- return TRUE;
- if (pic1->description && pic2->description
- && g_utf8_collate(pic1->description, pic2->description)!=0 )
+ if (et_normalized_strcmp0 (pic1->description, pic2->description) != 0)
+ {
return TRUE;
+ }
}
return FALSE; /* No changes */
diff --git a/src/misc.c b/src/misc.c
index 578fa4a..0ca71f6 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -435,3 +435,44 @@ et_undo_key_new (void)
static guint ETUndoKey = 0;
return ++ETUndoKey;
}
+
+/*
+ * et_normalized_strcmp0:
+ * @str1: UTF-8 string, or %NULL
+ * @str2: UTF-8 string to compare against, or %NULL
+ *
+ * Compare two UTF-8 strings, normalizing them before doing so, and return the
+ * difference.
+ *
+ * Returns: an integer less than, equal to, or greater than zero, if str1 is <,
+ * == or > than str2
+ */
+gint
+et_normalized_strcmp0 (const gchar *str1,
+ const gchar *str2)
+{
+ gint result;
+ gchar *normalized1;
+ gchar *normalized2;
+
+ /* Check for NULL, as it cannot be passed to g_utf8_normalize(). */
+ if (!str1)
+ {
+ return -(str1 != str2);
+ }
+
+ if (!str2)
+ {
+ return str1 != str2;
+ }
+
+ normalized1 = g_utf8_normalize (str1, -1, G_NORMALIZE_DEFAULT);
+ normalized2 = g_utf8_normalize (str2, -1, G_NORMALIZE_DEFAULT);
+
+ result = g_strcmp0 (normalized1, normalized2);
+
+ g_free (normalized1);
+ g_free (normalized2);
+
+ return result;
+}
diff --git a/src/misc.h b/src/misc.h
index 41183d6..48b0891 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -40,6 +40,7 @@ gchar * et_track_number_to_string (const guint track_number);
void et_filename_prepare (gchar *filename_utf8, gboolean replace_illegal);
guint et_undo_key_new (void);
+gint et_normalized_strcmp0 (const gchar *str1, const gchar *str2);
G_END_DECLS
diff --git a/tests/test-file_tag.c b/tests/test-file_tag.c
index 2b85b3c..2efa5bf 100644
--- a/tests/test-file_tag.c
+++ b/tests/test-file_tag.c
@@ -78,21 +78,30 @@ file_tag_difference (void)
g_assert (tag1);
- et_file_tag_set_title (tag1, "foo");
- et_file_tag_set_artist (tag1, "bar");
- et_file_tag_set_album_artist (tag1, "baz");
+ et_file_tag_set_title (tag1, "foo:");
- g_assert_cmpstr (tag1->title, ==, "foo");
- g_assert_cmpstr (tag1->artist, ==, "bar");
- g_assert_cmpstr (tag1->album_artist, ==, "baz");
+ /* Contains a full-width colon, which should compare differently to a
+ * colon. */
+ g_assert_cmpstr (tag1->title, ==, "foo:");
tag2 = et_file_tag_new ();
g_assert (tag2);
- et_file_tag_set_title (tag2, "flub");
- et_file_tag_set_artist (tag2, "blub");
- et_file_tag_set_album_artist (tag2, "slub");
+ et_file_tag_set_title (tag2, "foo:");
+
+ g_assert (et_file_tag_detect_difference (tag1, tag2));
+
+ et_file_tag_free (tag2);
+ et_file_tag_free (tag1);
+
+ tag1 = et_file_tag_new ();
+
+ et_file_tag_set_artist (tag1, "bar");
+
+ tag2 = et_file_tag_new ();
+
+ et_file_tag_set_artist (tag2, "baz");
g_assert (et_file_tag_detect_difference (tag1, tag2));
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]