#!/usr/bin/env python3 # # Script to generate libcpp/combining-chars.inc # # This file is part of GCC. # # GCC is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free # Software Foundation; either version 3, or (at your option) any later # version. # # GCC is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License # along with GCC; see the file COPYING3. If not see # . */ from pprint import pprint import unicodedata def is_combining_char(code_point) -> bool: return unicodedata.combining(chr(code_point)) != 0 class Range: def __init__(self, start, end, value): self.start = start self.end = end self.value = value def __repr__(self): return f'Range({self.start:x}, {self.end:x}, {self.value})' def make_ranges(value_callback): ranges = [] for code_point in range(0x10FFFF): value = is_combining_char(code_point) if 0: print(f'{code_point=:x} {value=}') if ranges and ranges[-1].value == value: # Extend current range ranges[-1].end = code_point else: # Start a new range ranges.append(Range(code_point, code_point, value)) return ranges ranges = make_ranges(is_combining_char) if 0: pprint(ranges) print(f"/* Generated by contrib/unicode/gen-combining-chars.py") print(f" using version {unicodedata.unidata_version}" " of the Unicode standard. */") print("\nstatic const cppchar_t combining_range_ends[] = {", end="") for i, r in enumerate(ranges): if i % 8: print(" ", end="") else: print("\n ", end="") print("0x%x," % r.end, end="") print("\n};\n") print("static const bool is_combining[] = {", end="") for i, r in enumerate(ranges): if i % 24: print(" ", end="") else: print("\n ", end="") if r.value: print("1,", end="") else: print("0,", end="") print("\n};")