[gnome-games/wip/aplazas/unicode: 3/4] utils: Add Unicode.read()



commit 820555472995b39ff313701d32a9ba83629faaa0
Author: Adrien Plazas <kekun plazas laposte net>
Date:   Fri May 4 22:53:42 2018 +0200

    utils: Add Unicode.read()
    
    This will allow to read unicode text files with a given encoding.
    
    https://gitlab.gnome.org/GNOME/gnome-games/issues/19

 src/Makefile.am        |  1 +
 src/utils/unicode.vala | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
---
diff --git a/src/Makefile.am b/src/Makefile.am
index ee95dc2..5c4eb8b 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -154,6 +154,7 @@ gnome_games_SOURCES = \
        utils/grep.vala \
        utils/local-cover.vala \
        utils/string-input-stream.vala \
+       utils/unicode.vala \
        utils/uri.vala \
        utils/uri-error.vala \
        utils/xml-doc.vala \
diff --git a/src/utils/unicode.vala b/src/utils/unicode.vala
new file mode 100644
index 0000000..3cf7eac
--- /dev/null
+++ b/src/utils/unicode.vala
@@ -0,0 +1,83 @@
+// This file is part of GNOME Games. License: GPL-3.0+.
+
+namespace Games.Unicode {
+       private enum Encoding {
+               UTF_32BE,
+               UTF_32LE,
+               UTF_16BE,
+               UTF_16LE,
+               UTF_8;
+
+               public string to_string () {
+                       switch (this) {
+                       case Encoding.UTF_32BE:
+                               return "utf-32be";
+                       case Encoding.UTF_32LE:
+                               return "utf-32le";
+                       case Encoding.UTF_16BE:
+                               return "utf-16be";
+                       case Encoding.UTF_16LE:
+                               return "utf-16le";
+                       default:
+                               return "utf-8";
+                       }
+               }
+       }
+
+       private Encoding parse_encoding (FileInputStream stream, out int bom_length) throws Error {
+               stream.seek (0, SeekType.SET);
+
+               uint8[4] c = { 0, 0, 0, 0 };
+               var size = stream.read (c);
+               if (size < 4) {
+                       bom_length = 0;
+
+                       return Encoding.UTF_8;
+               }
+
+               uint32 bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
+               if (bom == 0xfffe0000) {
+                       bom_length = 4;
+
+                       return Encoding.UTF_32BE;
+               }
+               else if (bom == 0x0000feff) {
+                       bom_length = 4;
+
+                       return Encoding.UTF_32LE;
+               }
+               else if ((bom & 0xffff) == 0xfffe) {
+                       bom_length = 2;
+
+                       return Encoding.UTF_16BE;
+               }
+               else if ((bom & 0xffff) == 0xfeff) {
+                       bom_length = 2;
+
+                       return Encoding.UTF_16LE;
+               }
+               else if ((bom & 0xffffff) == 0xbfbbef) {
+                       bom_length = 3;
+
+                       return Encoding.UTF_8;
+               }
+
+               bom_length = 0;
+
+               return Encoding.UTF_8;
+       }
+
+       private InputStream read (File file, Encoding encoding) throws Error {
+               var stream = file.read ();
+               int bom_length = 0;
+               var src_encoding = parse_encoding (stream, out bom_length);
+               stream.seek (bom_length, SeekType.SET);
+
+               if (encoding == Encoding.UTF_8)
+                       return stream;
+
+               var converter = new CharsetConverter (encoding.to_string (), src_encoding.to_string ());
+
+               return new ConverterInputStream (stream, converter);
+       }
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]