[vte] utf8: Reformat data table



commit 765f59f1c3c5881b43292223a9cb7545c0764c95
Author: Christian Persch <chpe src gnome org>
Date:   Mon Sep 3 16:10:51 2018 +0200

    utf8: Reformat data table
    
    Make the table more readable and add comments.

 src/utf8.cc | 63 ++++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 15 deletions(-)
---
diff --git a/src/utf8.cc b/src/utf8.cc
index 3a8f3087..b0d96218 100644
--- a/src/utf8.cc
+++ b/src/utf8.cc
@@ -28,20 +28,53 @@
 uint8_t const vte::base::UTF8Decoder::kTable[] = {
         // The first part of the table maps bytes to character classes that
         // to reduce the size of the transition table and create bitmasks.
-        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-        8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-        10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+        // The classes are as follows:
+        // 0x00..0x7f: 0
+        // 0x80..0x8f: 1
+        // 0x90..0x9f: 9
+        // 0xa0..0xbf: 7
+        // 0xc0..0xc1: 8
+        // 0xc2..0xdf: 2
+        // 0xe0:       10
+        // 0xe1..0xec: 3
+        // 0xed:       4
+        // 0xee..0xff: 3
+        // 0xf0:       11
+        // 0xf1..0xf3: 6
+        // 0xf4:       5
+        // 0xf5..0xff: 8
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00..0x0f
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10..0x1f
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20..0x2f
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x30..0x3f
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40..0x4f
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50..0x5f
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60..0x6f
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70..0x7f
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80..0x8f
+        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 0x90..0x9f
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 0xa0..0xaf
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 0xb0..0xbf
+        8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0..0xcf
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0..0xdf
+        10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, // 0xe0..0xef
+        11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 0xf0..0xff
 
-        // The second part is a transition table that maps a combination
-        // of a state of the automaton and a character class to a state.
-        0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
-        12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
-        12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
-        12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
-        12,36,12,12,12,12,12,12,12,12,12,12,
+        // To understand this DFA, see transitions graph on the website
+        // linked above.
+        // For each state (row), the table records which state will
+        // be transitioned to when consuming a character of the class
+        // (column).
+        /*
+         0   1   2   3   4   5   6   7   8   9  10  11 // character class
+        */
+         0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, // state 0 (accept)
+        12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, // state 12 (reject)
+        12,  0, 12, 12, 12, 12, 12,  0, 12,  0, 12, 12, // state 24
+        12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12, // state 36
+        12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, // state 48
+        12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, // state 60
+        12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, // state 72
+        12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, // state 84
+        12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, // state 96
 };


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]