aboutsummaryrefslogtreecommitdiff
path: root/contrib/unicode/gen-combining-chars.py
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/unicode/gen-combining-chars.py')
-rwxr-xr-xcontrib/unicode/gen-combining-chars.py75
1 files changed, 75 insertions, 0 deletions
diff --git a/contrib/unicode/gen-combining-chars.py b/contrib/unicode/gen-combining-chars.py
new file mode 100755
index 0000000..fb5ef50
--- /dev/null
+++ b/contrib/unicode/gen-combining-chars.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+#
+# Script to generate libcpp/combining-chars.inc
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>. */
+
+from pprint import pprint
+import unicodedata
+
+def is_combining_char(code_point) -> bool:
+ return unicodedata.combining(chr(code_point)) != 0
+
+class Range:
+ def __init__(self, start, end, value):
+ self.start = start
+ self.end = end
+ self.value = value
+
+ def __repr__(self):
+ return f'Range({self.start:x}, {self.end:x}, {self.value})'
+
+def make_ranges(value_callback):
+ ranges = []
+ for code_point in range(0x10FFFF):
+ value = is_combining_char(code_point)
+ if 0:
+ print(f'{code_point=:x} {value=}')
+ if ranges and ranges[-1].value == value:
+ # Extend current range
+ ranges[-1].end = code_point
+ else:
+ # Start a new range
+ ranges.append(Range(code_point, code_point, value))
+ return ranges
+
+ranges = make_ranges(is_combining_char)
+if 0:
+ pprint(ranges)
+
+print(f"/* Generated by contrib/unicode/gen-combining-chars.py")
+print(f" using version {unicodedata.unidata_version}"
+ " of the Unicode standard. */")
+print("\nstatic const cppchar_t combining_range_ends[] = {", end="")
+for i, r in enumerate(ranges):
+ if i % 8:
+ print(" ", end="")
+ else:
+ print("\n ", end="")
+ print("0x%x," % r.end, end="")
+print("\n};\n")
+print("static const bool is_combining[] = {", end="")
+for i, r in enumerate(ranges):
+ if i % 24:
+ print(" ", end="")
+ else:
+ print("\n ", end="")
+ if r.value:
+ print("1,", end="")
+ else:
+ print("0,", end="")
+print("\n};")