[pango: 1/4] import gen-break-table.py



commit 053c4878ef5572a5f88f0ce48b75a8ab86f035a1
Author: Peng Wu <alexepico gmail com>
Date:   Tue Feb 19 15:05:05 2019 +0800

    import gen-break-table.py

 tools/gen-break-table.py | 156 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)
---
diff --git a/tools/gen-break-table.py b/tools/gen-break-table.py
new file mode 100755
index 00000000..2c8a7d70
--- /dev/null
+++ b/tools/gen-break-table.py
@@ -0,0 +1,156 @@
+#!/usr/bin/python
+
+from __future__ import print_function, division, absolute_import
+import sys
+import os.path
+from collections import OrderedDict
+
+
+header = []
+ranges = OrderedDict()
+
+def load_data(filename):
+        global header, ranges
+        f = open(filename)
+        lines = f.readlines()
+        for line in lines:
+                header.append(line)
+                if not line.startswith("#"):
+                        break
+
+        for line in lines:
+               line = line.strip()
+               if not line or line[0] == '#':
+                       continue
+               rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]
+
+               rang = [int(s, 16) for s in rang.split('..')]
+               if len(rang) > 1:
+                       start, end = rang
+               else:
+                       start = end = rang[0]
+
+               if typ not in ranges:
+                       ranges[typ] = []
+               if ranges[typ] and ranges[typ][-1][1] == start - 1:
+                       ranges[typ][-1] = (ranges[typ][-1][0], end)
+               else:
+                       ranges[typ].append((start, end))
+
+
+def onecondition(start, end):
+        condition = ''
+        if start == end:
+                condition = 'wc == 0x' + format(start, '04X')
+        elif start < end:
+                condition = '(' + 'wc >= 0x' + format(start, '04X') + ' && ' + 'wc <= 0x' + format(end, 
'04X') + ')'
+        return condition
+
+
+# print out the numbers in compact form
+def print_if_branch(ranges):
+        conditions = []
+        for start, end in ranges:
+                condition = onecondition(start, end)
+                conditions.append(condition)
+        statement = "||\n".join(conditions)
+        print("if (%s)" % statement)
+        print("\treturn TRUE;")
+        print("return FALSE;")
+
+
+def print_one_line(start, end):
+        if start < end:
+                outline = 'if (' + onecondition(start, end) + ')'
+                print(outline)
+
+def print_ranges(ranges):
+        if 4 >= len(ranges):
+                conditions = []
+                for start, end in ranges:
+                        conditions.append(onecondition(start, end))
+
+                statement = " ||\n".join(conditions)
+                print('if (' + statement + ')')
+                print('\treturn TRUE;')
+                return
+
+        start = ranges[0][0]
+        end = ranges[-1][1]
+        print_one_line(start, end)
+        print('{')
+        print_balanced_search(ranges)
+        print('}')
+
+
+# print if branch like 4-way balanced search
+def print_balanced_search(ranges):
+        if 4 >= len(ranges):
+                print_ranges(ranges)
+                return
+
+        length = len(ranges)
+        step = int(length / 4)
+        first = step
+        second = int(length * 2 / 4)
+        third = second + step
+
+        newranges = ranges[0:first]
+        print_ranges(newranges)
+
+        newranges = ranges[first:second]
+        print_ranges(newranges)
+
+        newranges = ranges[second:third]
+        print_ranges(newranges)
+
+        newranges = ranges[third:]
+        print_ranges(newranges)
+
+        print("return FALSE;")
+
+
+def print_table():
+        global header, ranges
+        print("/* == Start of generated table == */")
+        print("/*")
+        print(" * The following tables are generated by running:")
+        print(" *")
+        print(" *   ./gen-break-table.py SentenceBreakProperty.txt IndicSyllabicCategory.txt | indent")
+        print(" *")
+        print(" * on files with these headers:")
+        print(" *")
+        for l in header:
+               print(" * %s" % (l.strip()))
+        print(" */")
+        print()
+        print("#ifndef PANGO_BREAK_TABLE_H")
+        print("#define PANGO_BREAK_TABLE_H")
+        print()
+        print("#include <glib.h>")
+        print()
+
+        for typ,s in ranges.items():
+               if typ not in ['STerm',
+                              'Virama',
+                              'Vowel_Dependent']: continue
+               print()
+               print("static inline gboolean _pango_is_%s (gunichar wc)" % typ)
+               print("{")
+                print_balanced_search(sorted(s))
+               print("}")
+
+        print()
+        print("#endif /* PANGO_BREAK_TABLE_H */")
+        print()
+        print("/* == End of generated table == */")
+
+
+if __name__ == "__main__":
+        if len (sys.argv) != 3:
+               print("usage: ./gen-break-table.py SentenceBreakProperty.txt IndicSyllabicCategory.txt | 
indent", file=sys.stderr)
+               sys.exit (1)
+
+        load_data(sys.argv[1])
+        load_data(sys.argv[2])
+        print_table()


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]