[Rhythmbox-devel] UTF-8 vorbis fields



Hi,

So some of my vorbis files are broken and have what appears to be
ISO-8859-1 in the fields.  I think the 'easytag' program did this. 
However, it would be nice if monkey-media (and rhythmbox) could detect
this situation and handle it as gracefully as possible, so here's a
patch for monkey-media.  Ok to apply?
Index: tests/test-info.c
===================================================================
RCS file: /cvs/gnome/monkey-media/tests/test-info.c,v
retrieving revision 1.8
diff -u -d -I$Id: -r1.8 test-info.c
--- tests/test-info.c	2 Sep 2002 17:51:34 -0000	1.8
+++ tests/test-info.c	26 Jan 2003 07:50:11 -0000
@@ -17,44 +17,87 @@
 	GValue val = {0, };
 	int j;
 
-	monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_TITLE, 0, &val);
-	g_print ("title:	%s\n", g_value_get_string (&val));
-	g_value_unset (&val);
-	monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_ARTIST, 0, &val);
-	g_print ("artist:	%s\n", g_value_get_string (&val));
-	g_value_unset (&val);
-	monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_ALBUM, 0, &val);
-	g_print ("album:	%s\n", g_value_get_string (&val));
-	g_value_unset (&val);
-	monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_DATE, 0, &val);
-	g_print ("date:		%s\n", g_value_get_string (&val));
-	g_value_unset (&val);
-	
-	for (j = 0; j < monkey_media_stream_info_get_n_values (i, MONKEY_MEDIA_STREAM_INFO_FIELD_GENRE); j++)
+	if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_TITLE, 0, &val))
 	{
-		monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_GENRE, j, &val);
-		g_print ("genre:	%s\n", g_value_get_string (&val));
+		g_print ("title:	%s\n", g_value_get_string (&val));
+		g_value_unset (&val);
+	}
+	else
+		g_print ("(no title available)\n");
+	if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_ARTIST, 0, &val))
+	{
+		g_print ("artist:	%s\n", g_value_get_string (&val));
+		g_value_unset (&val);
+	}
+	else
+		g_print ("(no artist available)\n");
+	if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_ALBUM, 0, &val))
+	{
+		g_print ("album:	%s\n", g_value_get_string (&val));
+		g_value_unset (&val);
+	}
+	else
+		g_print ("(no album available)\n");
+	if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_DATE, 0, &val))
+	{
+		g_print ("date:		%s\n", g_value_get_string (&val));
 		g_value_unset (&val);
 	}
+	else
+		g_print ("(no date available)\n");
+
+	for (j = 0; j < monkey_media_stream_info_get_n_values (i, MONKEY_MEDIA_STREAM_INFO_FIELD_GENRE); j++)
+	{
+		if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_GENRE,
+							j, &val))
+		{
+			g_print ("genre:	%s\n", g_value_get_string (&val));
+			g_value_unset (&val);
+		}
+	}
 	
-	monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_COMMENT, 0, &val);
-	g_print ("comment:	%s\n", g_value_get_string (&val));
-	g_value_unset (&val);
-	monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_AUDIO_CODEC_INFO, 0, &val);
-	g_print ("audiocodecinfo:	%s\n", g_value_get_string (&val));
-	g_value_unset (&val);
-	monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_TRACK_NUMBER, 0, &val);
-	g_print ("tracknum:	%d\n", g_value_get_int (&val));
-	g_value_unset (&val);
-	monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_AUDIO_BIT_RATE, 0, &val);
-	g_print ("audiobitrate:	%d\n", g_value_get_int (&val));
-	g_value_unset (&val);
-	monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_FILE_SIZE, 0, &val);
-	g_print ("filesize:	%ld\n", g_value_get_long (&val));
-	g_value_unset (&val);
-	monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_DURATION, 0, &val);
-	g_print ("length:	%ld\n", g_value_get_long (&val));
-	g_value_unset (&val);
+	if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_COMMENT, 0, &val))
+	{
+		g_print ("comment:	%s\n", g_value_get_string (&val));
+		g_value_unset (&val);
+	}
+	else
+		g_print ("(no comment available)\n");
+	if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_AUDIO_CODEC_INFO, 0, &val))
+	{
+		g_print ("audiocodecinfo:	%s\n", g_value_get_string (&val));
+		g_value_unset (&val);
+	}
+	else
+		g_print ("(no audiocodecinfo available)\n");
+	if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_TRACK_NUMBER, 0, &val))
+	{
+		g_print ("tracknum:	%d\n", g_value_get_int (&val));
+		g_value_unset (&val);
+	}
+	else
+		g_print ("(no tracknum available)\n");
+	if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_AUDIO_BIT_RATE, 0, &val))
+	{
+		g_print ("audiobitrate:	%d\n", g_value_get_int (&val));
+		g_value_unset (&val);
+	}
+	else
+		g_print ("(no audiobitrate available)\n");
+	if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_FILE_SIZE, 0, &val))
+	{
+		g_print ("filesize:	%ld\n", g_value_get_long (&val));
+		g_value_unset (&val);
+	}
+	else
+		g_print ("(no filesize available)\n");
+	if (monkey_media_stream_info_get_value (i, MONKEY_MEDIA_STREAM_INFO_FIELD_DURATION, 0, &val))
+	{
+		g_print ("length:	%ld\n", g_value_get_long (&val));
+		g_value_unset (&val);
+	}
+	else
+		g_print ("(no length available)\n");
 }
 
 int
Index: src/stream-info-impl/vorbis-stream-info-impl.c
===================================================================
RCS file: /cvs/gnome/monkey-media/src/stream-info-impl/vorbis-stream-info-impl.c,v
retrieving revision 1.15
diff -u -d -I$Id: -r1.15 vorbis-stream-info-impl.c
--- src/stream-info-impl/vorbis-stream-info-impl.c	3 Nov 2002 15:31:07 -0000	1.15
+++ src/stream-info-impl/vorbis-stream-info-impl.c	26 Jan 2003 07:50:11 -0000
@@ -358,6 +358,28 @@
 }
 
 static gboolean
+vorbis_stream_info_impl_get_strvalue_utf8 (VorbisStreamInfoImpl *impl,
+					   int index, char *entry, GValue *value)
+{
+	gsize read,written;
+	char *strval;
+	strval = g_strdup (vorbis_comment_query (impl->priv->comment, entry, index));
+	if (!g_utf8_validate (strval, -1, NULL))
+	{
+		char *tem;
+		g_warning ("Invalid UTF-8 in %s field in vorbis file\n", entry);
+		tem = g_locale_to_utf8 (strval, -1, &read, &written, NULL);
+		g_free (strval);
+		if (!tem)
+			return FALSE;
+		strval = tem;
+	}
+	g_value_init (value, G_TYPE_STRING);
+	g_value_set_string_take_ownership (value, strval);
+	return TRUE;
+}
+
+static gboolean
 vorbis_stream_info_impl_get_value (MonkeyMediaStreamInfo *info,
 			           MonkeyMediaStreamInfoField field,
 				   int index,
@@ -377,25 +399,15 @@
 	{
 	/* tags */
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_TITLE:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "title", index));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "title", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_ARTIST:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "artist", index));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "artist", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_ALBUM:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "album", index));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "album", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_DATE:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "date", index));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "date", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_GENRE:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "genre", index));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "genre", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_COMMENT:
 		{
 			int count;
@@ -407,6 +419,12 @@
 				g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "comment", index));
 			else
 				g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "", index - count));
+			if (!g_utf8_validate (g_value_get_string (value), -1, NULL))
+			{
+				g_warning ("Invalid UTF-8 in comment field in vorbis file\n");
+				g_value_unset (value);
+				return FALSE;
+			}
 		}
 		break;
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_TRACK_NUMBER:
@@ -458,41 +476,23 @@
 		}
 		break;
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_LOCATION:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "location", 0));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "location", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_DESCRIPTION:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "description", 0));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "description", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_VERSION:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "version", 0));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "version", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_ISRC:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "isrc", 0));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "isrc", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_ORGANIZATION:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "organization", 0));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "organization", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_COPYRIGHT:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "copyright", 0));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "copyright", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_CONTACT:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "contact", 0));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "contact", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_LICENSE:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "license", 0));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "license", value);
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_PERFORMER:
-		g_value_init (value, G_TYPE_STRING);
-		g_value_set_string (value, vorbis_comment_query (impl->priv->comment, "performer", 0));
-		break;
+		return vorbis_stream_info_impl_get_strvalue_utf8 (impl, index, "performer", value);
 
 	/* generic bits */
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_FILE_SIZE:
@@ -561,6 +561,12 @@
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_AUDIO_VENDOR:
 		g_value_init (value, G_TYPE_STRING);
 		g_value_set_string (value, impl->priv->comment->vendor);
+		if (!g_utf8_validate (g_value_get_string (value), -1, NULL))
+		{
+			g_warning ("Invalid UTF-8 in audio vendor field in vorbis file\n");
+			g_value_unset (value);
+			return FALSE;
+		}
 		break;
 	case MONKEY_MEDIA_STREAM_INFO_FIELD_AUDIO_ALBUM_GAIN:
 		{


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]