diff options
Diffstat (limited to 'contrib/unicode/gen-combining-chars.py')
-rwxr-xr-x | contrib/unicode/gen-combining-chars.py | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/contrib/unicode/gen-combining-chars.py b/contrib/unicode/gen-combining-chars.py new file mode 100755 index 0000000..fb5ef50 --- /dev/null +++ b/contrib/unicode/gen-combining-chars.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# +# Script to generate libcpp/combining-chars.inc +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. */ + +from pprint import pprint +import unicodedata + +def is_combining_char(code_point) -> bool: + return unicodedata.combining(chr(code_point)) != 0 + +class Range: + def __init__(self, start, end, value): + self.start = start + self.end = end + self.value = value + + def __repr__(self): + return f'Range({self.start:x}, {self.end:x}, {self.value})' + +def make_ranges(value_callback): + ranges = [] + for code_point in range(0x10FFFF): + value = is_combining_char(code_point) + if 0: + print(f'{code_point=:x} {value=}') + if ranges and ranges[-1].value == value: + # Extend current range + ranges[-1].end = code_point + else: + # Start a new range + ranges.append(Range(code_point, code_point, value)) + return ranges + +ranges = make_ranges(is_combining_char) +if 0: + pprint(ranges) + +print(f"/* Generated by contrib/unicode/gen-combining-chars.py") +print(f" using version {unicodedata.unidata_version}" + " of the Unicode standard. */") +print("\nstatic const cppchar_t combining_range_ends[] = {", end="") +for i, r in enumerate(ranges): + if i % 8: + print(" ", end="") + else: + print("\n ", end="") + print("0x%x," % r.end, end="") +print("\n};\n") +print("static const bool is_combining[] = {", end="") +for i, r in enumerate(ranges): + if i % 24: + print(" ", end="") + else: + print("\n ", end="") + if r.value: + print("1,", end="") + else: + print("0,", end="") +print("\n};") |