[Banshee-List] Re: utf8 validation
- From: Dogacan Guney <dogacan gmail com>
- To: banshee-list gnome org
- Subject: [Banshee-List] Re: utf8 validation
- Date: Sat, 19 Nov 2005 21:48:48 +0200
On 11/19/05, Dogacan Guney <dogacan gmail com> wrote:
> Hello everyone,
>
> The attached patch adds a new public class (UnicodeValidator) to
> Entagged. It has one public function (ValidateUTF8) which checks if
> the given byte array can be validated as UTF8.
>
> Also, id3v1 tag reader now tries to read the tag fields as utf8.
>
> With this patch, I can view all Turkish language specific characters
> in id3v1 tags.
>
>
>
validateutf8 had some bugs. This new one should fix it.
? validate_id3v1.patch
? validate_id3v1_ver2.patch
? burn-sharp/.deps
? burn-sharp/.libs
? burn-sharp/glue.lo
? burn-sharp/libnautilusburnglue.la
? libbanshee/.deps
? libbanshee/.libs
? libbanshee/cd-detect.lo
? libbanshee/cd-rip.lo
? libbanshee/gst-encode.lo
? libbanshee/gst-init.lo
? libbanshee/gst-misc.lo
? libbanshee/gst-player-engine.lo
? libbanshee/inotify-glue.lo
? libbanshee/libbanshee.la
? libbanshee/xing/.deps
? po/.intltool-merge-cache
Index: entagged-sharp/EncodingInfo.cs
===================================================================
RCS file: /cvs/gnome/banshee/entagged-sharp/EncodingInfo.cs,v
retrieving revision 1.5
diff -p -u -2 -r1.5 EncodingInfo.cs
--- entagged-sharp/EncodingInfo.cs 1 Nov 2005 23:32:01 -0000 1.5
+++ entagged-sharp/EncodingInfo.cs 19 Nov 2005 19:47:05 -0000
@@ -109,3 +109,75 @@ public class EncodingInfo {
}
}
+
+public class UnicodeValidator {
+
+ public static bool ValidateUTF8(byte[] str)
+ {
+ int i;
+ int min = 0, val = 0;
+
+ try {
+ for(i = 0; i < str.Length; i++) {
+ if(str[i] < 128)
+ continue;
+
+ if((str[i] & 0xe0) == 0xc0) { /* 110xxxxx */
+ if((str[i] & 0x1e) == 0)
+ return false;
+ i++;
+ if((str[i] & 0xc0) != 0x80) /* 10xxxxxx */
+ return false;
+ } else {
+ if((str[i] & 0xf0) == 0xe0) { /* 1110xxxx */
+ min = (1 << 11);
+ val = str[i] & 0x0f;
+ goto TWO_REMAINING;
+ } else if((str[i] & 0xf8) == 0xf0) { /* 11110xxx */
+ min = (1 << 16);
+ val = str[i] & 0x07;
+ } else {
+ return false;
+ }
+ i++;
+ if(!continuation_char(str, i, ref val))
+ return false;
+ TWO_REMAINING:
+ i++;
+ if(!continuation_char(str, i, ref val))
+ return false;
+ i++;
+ if(!continuation_char(str, i, ref val))
+ return false;
+
+ if(val < min || !unicode_valid(val))
+ return false;
+ }
+ }
+ } catch (System.IndexOutOfRangeException e) {
+ return false;
+ }
+
+ return true;
+ }
+
+ private static bool continuation_char(byte[] str, int i, ref int val)
+ {
+ if ((str[i] & 0xc0) != 0x80) /* 10xxxxxx */
+ return false;
+
+ val <<= 6;
+ val |= str[i] & 0x3f;
+
+ return true;
+ }
+
+ private static bool unicode_valid(int b)
+ {
+ return (b < 0x110000 &&
+ ((b & 0xFFFFF800) != 0xD800) &&
+ (b < 0xFDD0 || b > 0xFDEF) &&
+ (b & 0xFFFE) != 0xFFFE);
+ }
+}
+
}
Index: entagged-sharp/Mp3/Util/Id3v1TagReader.cs
===================================================================
RCS file: /cvs/gnome/banshee/entagged-sharp/Mp3/Util/Id3v1TagReader.cs,v
retrieving revision 1.5
diff -p -u -2 -r1.5 Id3v1TagReader.cs
--- entagged-sharp/Mp3/Util/Id3v1TagReader.cs 1 Nov 2005 23:32:03 -0000 1.5
+++ entagged-sharp/Mp3/Util/Id3v1TagReader.cs 19 Nov 2005 19:47:06 -0000
@@ -80,5 +80,10 @@ namespace Entagged.Audioformats.Mp3.Util
byte[] b = new byte[length];
mp3Stream.Read( b, 0, b.Length );
- string ret = Encoding.GetEncoding("ISO-8859-1").GetString(b).Trim();
+ string ret;
+
+ if(Entagged.Audioformats.UnicodeValidator.ValidateUTF8(b))
+ ret = Encoding.UTF8.GetString(b).Trim();
+ else
+ ret = Encoding.GetEncoding("ISO-8859-1").GetString(b).Trim();
int pos = ret.IndexOf('\0');
Index: src/Banshee.Widgets/Makefile.in
===================================================================
RCS file: /cvs/gnome/banshee/src/Banshee.Widgets/Makefile.in,v
retrieving revision 1.1
diff -p -u -2 -r1.1 Makefile.in
--- src/Banshee.Widgets/Makefile.in 17 Nov 2005 09:09:51 -0000 1.1
+++ src/Banshee.Widgets/Makefile.in 19 Nov 2005 19:47:11 -0000
@@ -1,3 +1,3 @@
-# Makefile.in generated by automake 1.9.6 from Makefile.am.
+# Makefile.in generated by automake 1.9.5 from Makefile.am.
# @configure_input@
@@ -173,4 +173,5 @@ MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_
MAKEINFO = @MAKEINFO@
MCS = @MCS@
+MKINSTALLDIRS = @MKINSTALLDIRS@
MONO = @MONO@
MONO_CFLAGS = @MONO_CFLAGS@
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]