[pango] [emoji] Shrink the emoji table by merging adjacent ranges



commit 32166a14a41c9183e2880459aceb79a6f56273e7
Author: Behdad Esfahbod <behdad behdad org>
Date:   Tue Nov 20 22:12:47 2018 -0500

    [emoji] Shrink the emoji table by merging adjacent ranges
    
    Patch by David Corbett in HarfBuzz.

 pango/pango-emoji-table.h | 231 +++++++++-------------------------------------
 tools/gen-emoji-table.py  |  13 ++-
 2 files changed, 50 insertions(+), 194 deletions(-)
---
diff --git a/pango/pango-emoji-table.h b/pango/pango-emoji-table.h
index da9ff4fc..77d99a71 100644
--- a/pango/pango-emoji-table.h
+++ b/pango/pango-emoji-table.h
@@ -7,13 +7,13 @@
  * on file with this header:
  *
  * # emoji-data.txt
- * # Date: 2018-02-07, 07:55:18 GMT
- * # © 2018 Unicode®, Inc.
+ * # Date: 2017-06-19, 11:13:24 GMT
+ * # © 2017 Unicode®, Inc.
  * # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
  * # For terms of use, see http://www.unicode.org/terms_of_use.html
  * #
- * # Emoji Data for UTS #51
- * # Version: 11.0
+ * # Emoji Data for UTR #51
+ * # Version: 5.0
  * #
  * # For documentation and usage, see http://www.unicode.org/reports/tr51
  */
@@ -65,12 +65,12 @@ static const struct Interval _pango_Emoji_table[] =
   {0x2640, 0x2640},
   {0x2642, 0x2642},
   {0x2648, 0x2653},
-  {0x265F, 0x2660},
+  {0x2660, 0x2660},
   {0x2663, 0x2663},
   {0x2665, 0x2666},
   {0x2668, 0x2668},
   {0x267B, 0x267B},
-  {0x267E, 0x267F},
+  {0x267F, 0x267F},
   {0x2692, 0x2697},
   {0x2699, 0x2699},
   {0x269B, 0x269C},
@@ -80,8 +80,7 @@ static const struct Interval _pango_Emoji_table[] =
   {0x26BD, 0x26BE},
   {0x26C4, 0x26C5},
   {0x26C8, 0x26C8},
-  {0x26CE, 0x26CE},
-  {0x26CF, 0x26CF},
+  {0x26CE, 0x26CF},
   {0x26D1, 0x26D1},
   {0x26D3, 0x26D4},
   {0x26E9, 0x26EA},
@@ -90,9 +89,7 @@ static const struct Interval _pango_Emoji_table[] =
   {0x26FD, 0x26FD},
   {0x2702, 0x2702},
   {0x2705, 0x2705},
-  {0x2708, 0x2709},
-  {0x270A, 0x270B},
-  {0x270C, 0x270D},
+  {0x2708, 0x270D},
   {0x270F, 0x270F},
   {0x2712, 0x2712},
   {0x2714, 0x2714},
@@ -124,8 +121,7 @@ static const struct Interval _pango_Emoji_table[] =
   {0x1F004, 0x1F004},
   {0x1F0CF, 0x1F0CF},
   {0x1F170, 0x1F171},
-  {0x1F17E, 0x1F17E},
-  {0x1F17F, 0x1F17F},
+  {0x1F17E, 0x1F17F},
   {0x1F18E, 0x1F18E},
   {0x1F191, 0x1F19A},
   {0x1F1E6, 0x1F1FF},
@@ -134,51 +130,23 @@ static const struct Interval _pango_Emoji_table[] =
   {0x1F22F, 0x1F22F},
   {0x1F232, 0x1F23A},
   {0x1F250, 0x1F251},
-  {0x1F300, 0x1F320},
-  {0x1F321, 0x1F321},
-  {0x1F324, 0x1F32C},
-  {0x1F32D, 0x1F32F},
-  {0x1F330, 0x1F335},
-  {0x1F336, 0x1F336},
-  {0x1F337, 0x1F37C},
-  {0x1F37D, 0x1F37D},
-  {0x1F37E, 0x1F37F},
-  {0x1F380, 0x1F393},
+  {0x1F300, 0x1F321},
+  {0x1F324, 0x1F393},
   {0x1F396, 0x1F397},
   {0x1F399, 0x1F39B},
-  {0x1F39E, 0x1F39F},
-  {0x1F3A0, 0x1F3C4},
-  {0x1F3C5, 0x1F3C5},
-  {0x1F3C6, 0x1F3CA},
-  {0x1F3CB, 0x1F3CE},
-  {0x1F3CF, 0x1F3D3},
-  {0x1F3D4, 0x1F3DF},
-  {0x1F3E0, 0x1F3F0},
+  {0x1F39E, 0x1F3F0},
   {0x1F3F3, 0x1F3F5},
-  {0x1F3F7, 0x1F3F7},
-  {0x1F3F8, 0x1F3FF},
-  {0x1F400, 0x1F43E},
-  {0x1F43F, 0x1F43F},
-  {0x1F440, 0x1F440},
-  {0x1F441, 0x1F441},
-  {0x1F442, 0x1F4F7},
-  {0x1F4F8, 0x1F4F8},
-  {0x1F4F9, 0x1F4FC},
-  {0x1F4FD, 0x1F4FD},
-  {0x1F4FF, 0x1F4FF},
-  {0x1F500, 0x1F53D},
-  {0x1F549, 0x1F54A},
-  {0x1F54B, 0x1F54E},
+  {0x1F3F7, 0x1F4FD},
+  {0x1F4FF, 0x1F53D},
+  {0x1F549, 0x1F54E},
   {0x1F550, 0x1F567},
   {0x1F56F, 0x1F570},
-  {0x1F573, 0x1F579},
-  {0x1F57A, 0x1F57A},
+  {0x1F573, 0x1F57A},
   {0x1F587, 0x1F587},
   {0x1F58A, 0x1F58D},
   {0x1F590, 0x1F590},
   {0x1F595, 0x1F596},
-  {0x1F5A4, 0x1F5A4},
-  {0x1F5A5, 0x1F5A5},
+  {0x1F5A4, 0x1F5A5},
   {0x1F5A8, 0x1F5A8},
   {0x1F5B1, 0x1F5B2},
   {0x1F5BC, 0x1F5BC},
@@ -190,73 +158,22 @@ static const struct Interval _pango_Emoji_table[] =
   {0x1F5E8, 0x1F5E8},
   {0x1F5EF, 0x1F5EF},
   {0x1F5F3, 0x1F5F3},
-  {0x1F5FA, 0x1F5FA},
-  {0x1F5FB, 0x1F5FF},
-  {0x1F600, 0x1F600},
-  {0x1F601, 0x1F610},
-  {0x1F611, 0x1F611},
-  {0x1F612, 0x1F614},
-  {0x1F615, 0x1F615},
-  {0x1F616, 0x1F616},
-  {0x1F617, 0x1F617},
-  {0x1F618, 0x1F618},
-  {0x1F619, 0x1F619},
-  {0x1F61A, 0x1F61A},
-  {0x1F61B, 0x1F61B},
-  {0x1F61C, 0x1F61E},
-  {0x1F61F, 0x1F61F},
-  {0x1F620, 0x1F625},
-  {0x1F626, 0x1F627},
-  {0x1F628, 0x1F62B},
-  {0x1F62C, 0x1F62C},
-  {0x1F62D, 0x1F62D},
-  {0x1F62E, 0x1F62F},
-  {0x1F630, 0x1F633},
-  {0x1F634, 0x1F634},
-  {0x1F635, 0x1F640},
-  {0x1F641, 0x1F642},
-  {0x1F643, 0x1F644},
-  {0x1F645, 0x1F64F},
+  {0x1F5FA, 0x1F64F},
   {0x1F680, 0x1F6C5},
-  {0x1F6CB, 0x1F6CF},
-  {0x1F6D0, 0x1F6D0},
-  {0x1F6D1, 0x1F6D2},
+  {0x1F6CB, 0x1F6D2},
   {0x1F6E0, 0x1F6E5},
   {0x1F6E9, 0x1F6E9},
   {0x1F6EB, 0x1F6EC},
   {0x1F6F0, 0x1F6F0},
-  {0x1F6F3, 0x1F6F3},
-  {0x1F6F4, 0x1F6F6},
-  {0x1F6F7, 0x1F6F8},
-  {0x1F6F9, 0x1F6F9},
-  {0x1F910, 0x1F918},
-  {0x1F919, 0x1F91E},
-  {0x1F91F, 0x1F91F},
-  {0x1F920, 0x1F927},
-  {0x1F928, 0x1F92F},
-  {0x1F930, 0x1F930},
-  {0x1F931, 0x1F932},
-  {0x1F933, 0x1F93A},
+  {0x1F6F3, 0x1F6F8},
+  {0x1F910, 0x1F93A},
   {0x1F93C, 0x1F93E},
   {0x1F940, 0x1F945},
-  {0x1F947, 0x1F94B},
-  {0x1F94C, 0x1F94C},
-  {0x1F94D, 0x1F94F},
-  {0x1F950, 0x1F95E},
-  {0x1F95F, 0x1F96B},
-  {0x1F96C, 0x1F970},
-  {0x1F973, 0x1F976},
-  {0x1F97A, 0x1F97A},
-  {0x1F97C, 0x1F97F},
-  {0x1F980, 0x1F984},
-  {0x1F985, 0x1F991},
-  {0x1F992, 0x1F997},
-  {0x1F998, 0x1F9A2},
-  {0x1F9B0, 0x1F9B9},
+  {0x1F947, 0x1F94C},
+  {0x1F950, 0x1F96B},
+  {0x1F980, 0x1F997},
   {0x1F9C0, 0x1F9C0},
-  {0x1F9C1, 0x1F9C2},
   {0x1F9D0, 0x1F9E6},
-  {0x1F9E7, 0x1F9FF},
 };
 
 static const struct Interval _pango_Emoji_Presentation_table[] =
@@ -306,92 +223,36 @@ static const struct Interval _pango_Emoji_Presentation_table[] =
   {0x1F238, 0x1F23A},
   {0x1F250, 0x1F251},
   {0x1F300, 0x1F320},
-  {0x1F32D, 0x1F32F},
-  {0x1F330, 0x1F335},
+  {0x1F32D, 0x1F335},
   {0x1F337, 0x1F37C},
-  {0x1F37E, 0x1F37F},
-  {0x1F380, 0x1F393},
-  {0x1F3A0, 0x1F3C4},
-  {0x1F3C5, 0x1F3C5},
-  {0x1F3C6, 0x1F3CA},
+  {0x1F37E, 0x1F393},
+  {0x1F3A0, 0x1F3CA},
   {0x1F3CF, 0x1F3D3},
   {0x1F3E0, 0x1F3F0},
   {0x1F3F4, 0x1F3F4},
-  {0x1F3F8, 0x1F3FF},
-  {0x1F400, 0x1F43E},
+  {0x1F3F8, 0x1F43E},
   {0x1F440, 0x1F440},
-  {0x1F442, 0x1F4F7},
-  {0x1F4F8, 0x1F4F8},
-  {0x1F4F9, 0x1F4FC},
-  {0x1F4FF, 0x1F4FF},
-  {0x1F500, 0x1F53D},
+  {0x1F442, 0x1F4FC},
+  {0x1F4FF, 0x1F53D},
   {0x1F54B, 0x1F54E},
   {0x1F550, 0x1F567},
   {0x1F57A, 0x1F57A},
   {0x1F595, 0x1F596},
   {0x1F5A4, 0x1F5A4},
-  {0x1F5FB, 0x1F5FF},
-  {0x1F600, 0x1F600},
-  {0x1F601, 0x1F610},
-  {0x1F611, 0x1F611},
-  {0x1F612, 0x1F614},
-  {0x1F615, 0x1F615},
-  {0x1F616, 0x1F616},
-  {0x1F617, 0x1F617},
-  {0x1F618, 0x1F618},
-  {0x1F619, 0x1F619},
-  {0x1F61A, 0x1F61A},
-  {0x1F61B, 0x1F61B},
-  {0x1F61C, 0x1F61E},
-  {0x1F61F, 0x1F61F},
-  {0x1F620, 0x1F625},
-  {0x1F626, 0x1F627},
-  {0x1F628, 0x1F62B},
-  {0x1F62C, 0x1F62C},
-  {0x1F62D, 0x1F62D},
-  {0x1F62E, 0x1F62F},
-  {0x1F630, 0x1F633},
-  {0x1F634, 0x1F634},
-  {0x1F635, 0x1F640},
-  {0x1F641, 0x1F642},
-  {0x1F643, 0x1F644},
-  {0x1F645, 0x1F64F},
+  {0x1F5FB, 0x1F64F},
   {0x1F680, 0x1F6C5},
   {0x1F6CC, 0x1F6CC},
-  {0x1F6D0, 0x1F6D0},
-  {0x1F6D1, 0x1F6D2},
+  {0x1F6D0, 0x1F6D2},
   {0x1F6EB, 0x1F6EC},
-  {0x1F6F4, 0x1F6F6},
-  {0x1F6F7, 0x1F6F8},
-  {0x1F6F9, 0x1F6F9},
-  {0x1F910, 0x1F918},
-  {0x1F919, 0x1F91E},
-  {0x1F91F, 0x1F91F},
-  {0x1F920, 0x1F927},
-  {0x1F928, 0x1F92F},
-  {0x1F930, 0x1F930},
-  {0x1F931, 0x1F932},
-  {0x1F933, 0x1F93A},
+  {0x1F6F4, 0x1F6F8},
+  {0x1F910, 0x1F93A},
   {0x1F93C, 0x1F93E},
   {0x1F940, 0x1F945},
-  {0x1F947, 0x1F94B},
-  {0x1F94C, 0x1F94C},
-  {0x1F94D, 0x1F94F},
-  {0x1F950, 0x1F95E},
-  {0x1F95F, 0x1F96B},
-  {0x1F96C, 0x1F970},
-  {0x1F973, 0x1F976},
-  {0x1F97A, 0x1F97A},
-  {0x1F97C, 0x1F97F},
-  {0x1F980, 0x1F984},
-  {0x1F985, 0x1F991},
-  {0x1F992, 0x1F997},
-  {0x1F998, 0x1F9A2},
-  {0x1F9B0, 0x1F9B9},
+  {0x1F947, 0x1F94C},
+  {0x1F950, 0x1F96B},
+  {0x1F980, 0x1F997},
   {0x1F9C0, 0x1F9C0},
-  {0x1F9C1, 0x1F9C2},
   {0x1F9D0, 0x1F9E6},
-  {0x1F9E7, 0x1F9FF},
 };
 
 static const struct Interval _pango_Emoji_Modifier_table[] =
@@ -403,13 +264,11 @@ static const struct Interval _pango_Emoji_Modifier_Base_table[] =
 {
   {0x261D, 0x261D},
   {0x26F9, 0x26F9},
-  {0x270A, 0x270B},
-  {0x270C, 0x270D},
+  {0x270A, 0x270D},
   {0x1F385, 0x1F385},
   {0x1F3C2, 0x1F3C4},
   {0x1F3C7, 0x1F3C7},
-  {0x1F3CA, 0x1F3CA},
-  {0x1F3CB, 0x1F3CC},
+  {0x1F3CA, 0x1F3CC},
   {0x1F442, 0x1F443},
   {0x1F446, 0x1F450},
   {0x1F466, 0x1F469},
@@ -429,17 +288,11 @@ static const struct Interval _pango_Emoji_Modifier_Base_table[] =
   {0x1F6B4, 0x1F6B6},
   {0x1F6C0, 0x1F6C0},
   {0x1F6CC, 0x1F6CC},
-  {0x1F918, 0x1F918},
-  {0x1F919, 0x1F91C},
-  {0x1F91E, 0x1F91E},
-  {0x1F91F, 0x1F91F},
+  {0x1F918, 0x1F91C},
+  {0x1F91E, 0x1F91F},
   {0x1F926, 0x1F926},
-  {0x1F930, 0x1F930},
-  {0x1F931, 0x1F932},
-  {0x1F933, 0x1F939},
+  {0x1F930, 0x1F939},
   {0x1F93D, 0x1F93E},
-  {0x1F9B5, 0x1F9B6},
-  {0x1F9B8, 0x1F9B9},
   {0x1F9D1, 0x1F9DD},
 };
 
diff --git a/tools/gen-emoji-table.py b/tools/gen-emoji-table.py
index b8018eee..48832913 100755
--- a/tools/gen-emoji-table.py
+++ b/tools/gen-emoji-table.py
@@ -12,7 +12,7 @@ if len (sys.argv) != 2:
 f = open(sys.argv[1])
 header = [f.readline () for _ in range(10)]
 
-sets = OrderedDict()
+ranges = OrderedDict()
 for line in f.readlines():
        line = line.strip()
        if not line or line[0] == '#':
@@ -25,9 +25,12 @@ for line in f.readlines():
        else:
                start = end = rang[0]
 
-       if typ not in sets:
-               sets[typ] = set()
-       sets[typ].add((start, end))
+       if typ not in ranges:
+               ranges[typ] = []
+       if ranges[typ] and ranges[typ][-1][1] == start - 1:
+               ranges[typ][-1] = (ranges[typ][-1][0], end)
+       else:
+               ranges[typ].append((start, end))
 
 
 
@@ -50,7 +53,7 @@ print("#include <glib.h>")
 print()
 print("struct Interval {\n  gunichar start, end;\n};")
 
-for typ,s in sets.items():
+for typ,s in ranges.items():
        if typ not in ['Emoji',
                       'Emoji_Presentation',
                       'Emoji_Modifier',


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]