diff options
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/ChangeLog | 6 | ||||
-rwxr-xr-x | contrib/unicode/gen-box-drawing-chars.py | 94 | ||||
-rwxr-xr-x | contrib/unicode/gen-combining-chars.py | 75 | ||||
-rwxr-xr-x | contrib/unicode/gen-printable-chars.py | 77 |
4 files changed, 252 insertions, 0 deletions
diff --git a/contrib/ChangeLog b/contrib/ChangeLog index 9b73cdd..92af01d 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,3 +1,9 @@ +2023-06-22 David Malcolm <dmalcolm@redhat.com> + + * unicode/gen-box-drawing-chars.py: New file. + * unicode/gen-combining-chars.py: New file. + * unicode/gen-printable-chars.py: New file. + 2023-06-17 Thiago Jung Bauermann <thiago.bauermann@linaro.org> * testsuite-management/validate_failures.py (IsInterestingResult): diff --git a/contrib/unicode/gen-box-drawing-chars.py b/contrib/unicode/gen-box-drawing-chars.py new file mode 100755 index 0000000..9a55266 --- /dev/null +++ b/contrib/unicode/gen-box-drawing-chars.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +# +# Script to generate gcc/text-art/box-drawing-chars.inc +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. */ + +import unicodedata + +def get_box_drawing_char_name(up: bool, + down: bool, + left: bool, + right: bool) -> str: + if 0: + print(f'{locals()=}') + if up and down: + vertical = True + up = False + down = False + else: + vertical = False + + if left and right: + horizontal = True + left = False + right = False + else: + horizontal = False + + weights = [] + heavy = [] + light = [] + dirs = [] + for dir_name in ('up', 'down', 'vertical', 'left', 'right', 'horizontal'): + val = locals()[dir_name] + if val: + dirs.append(dir_name.upper()) + + if not dirs: + return 'SPACE' + + name = 'BOX DRAWINGS' + #print(f'{light=} {heavy=}') + + if 0: + print(dirs) + + def weights_frag(weight: str, dirs: list, prefix: bool): + """ + Generate a fragment where all directions share the same weight, e.g.: + 'HEAVY HORIZONTAL' + 'DOWN LIGHT' + 'LEFT DOWN HEAVY' + 'HEAVY DOWN AND RIGHT' + """ + assert len(dirs) >= 1 + assert len(dirs) <= 2 + if prefix: + return f' {weight} ' + (' AND '.join(dirs)) + else: + return ' ' + (' '.join(dirs)) + f' {weight}' + + assert(len(dirs) >= 1 and len(dirs) <= 2) + name += weights_frag('LIGHT', dirs, True) + + return name + +print('/* Generated by contrib/unicode/gen-box-drawing-chars.py. */') +print() +for i in range(16): + up = (i & 8) + down = (i & 4) + left = (i & 2) + right = (i & 1) + name = get_box_drawing_char_name(up, down, left, right) + if i < 15: + trailing_comma = ',' + else: + trailing_comma = ' ' + unichar = unicodedata.lookup(name) + print(f'0x{ord(unichar):04X}{trailing_comma} /* "{unichar}": U+{ord(unichar):04X}: {name} */') diff --git a/contrib/unicode/gen-combining-chars.py b/contrib/unicode/gen-combining-chars.py new file mode 100755 index 0000000..fb5ef50 --- /dev/null +++ b/contrib/unicode/gen-combining-chars.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# +# Script to generate libcpp/combining-chars.inc +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. */ + +from pprint import pprint +import unicodedata + +def is_combining_char(code_point) -> bool: + return unicodedata.combining(chr(code_point)) != 0 + +class Range: + def __init__(self, start, end, value): + self.start = start + self.end = end + self.value = value + + def __repr__(self): + return f'Range({self.start:x}, {self.end:x}, {self.value})' + +def make_ranges(value_callback): + ranges = [] + for code_point in range(0x10FFFF): + value = is_combining_char(code_point) + if 0: + print(f'{code_point=:x} {value=}') + if ranges and ranges[-1].value == value: + # Extend current range + ranges[-1].end = code_point + else: + # Start a new range + ranges.append(Range(code_point, code_point, value)) + return ranges + +ranges = make_ranges(is_combining_char) +if 0: + pprint(ranges) + +print(f"/* Generated by contrib/unicode/gen-combining-chars.py") +print(f" using version {unicodedata.unidata_version}" + " of the Unicode standard. */") +print("\nstatic const cppchar_t combining_range_ends[] = {", end="") +for i, r in enumerate(ranges): + if i % 8: + print(" ", end="") + else: + print("\n ", end="") + print("0x%x," % r.end, end="") +print("\n};\n") +print("static const bool is_combining[] = {", end="") +for i, r in enumerate(ranges): + if i % 24: + print(" ", end="") + else: + print("\n ", end="") + if r.value: + print("1,", end="") + else: + print("0,", end="") +print("\n};") diff --git a/contrib/unicode/gen-printable-chars.py b/contrib/unicode/gen-printable-chars.py new file mode 100755 index 0000000..7684c08 --- /dev/null +++ b/contrib/unicode/gen-printable-chars.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# +# Script to generate libcpp/printable-chars.inc +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. */ + +from pprint import pprint +import unicodedata + +def is_printable_char(code_point) -> bool: + category = unicodedata.category(chr(code_point)) + # "Cc" is "control" and "Cf" is "format" + return category[0] != 'C' + +class Range: + def __init__(self, start, end, value): + self.start = start + self.end = end + self.value = value + + def __repr__(self): + return f'Range({self.start:x}, {self.end:x}, {self.value})' + +def make_ranges(value_callback): + ranges = [] + for code_point in range(0x10FFFF): + value = is_printable_char(code_point) + if 0: + print(f'{code_point=:x} {value=}') + if ranges and ranges[-1].value == value: + # Extend current range + ranges[-1].end = code_point + else: + # Start a new range + ranges.append(Range(code_point, code_point, value)) + return ranges + +ranges = make_ranges(is_printable_char) +if 0: + pprint(ranges) + +print(f"/* Generated by contrib/unicode/gen-printable-chars.py") +print(f" using version {unicodedata.unidata_version}" + " of the Unicode standard. */") +print("\nstatic const cppchar_t printable_range_ends[] = {", end="") +for i, r in enumerate(ranges): + if i % 8: + print(" ", end="") + else: + print("\n ", end="") + print("0x%x," % r.end, end="") +print("\n};\n") +print("static const bool is_printable[] = {", end="") +for i, r in enumerate(ranges): + if i % 24: + print(" ", end="") + else: + print("\n ", end="") + if r.value: + print("1,", end="") + else: + print("0,", end="") +print("\n};") |