diff options
Diffstat (limited to 'gdb/ada-unicode.py')
-rwxr-xr-x | gdb/ada-unicode.py | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/gdb/ada-unicode.py b/gdb/ada-unicode.py new file mode 100755 index 0000000..4c4986b --- /dev/null +++ b/gdb/ada-unicode.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 + +# Generate Unicode case-folding table for Ada. + +# Copyright (C) 2022 Free Software Foundation, Inc. + +# This file is part of GDB. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# This generates the ada-casefold.h header. +# Usage: +# python ada-unicode.py + +import gdbcopyright + +# The start of the current range of case-conversions we are +# processing. If RANGE_START is None, then we're outside of a range. +range_start = None +# End of the current range. +range_end = None +# The delta between RANGE_START and the upper-case variant of that +# character. +upper_delta = None +# The delta between RANGE_START and the lower-case variant of that +# character. +lower_delta = None + +# All the ranges found and completed so far. +# Each entry is a tuple of the form (START, END, UPPER_DELTA, LOWER_DELTA). +all_ranges = [] + + +def finish_range(): + global range_start + global range_end + global upper_delta + global lower_delta + if range_start is not None: + all_ranges.append((range_start, range_end, upper_delta, lower_delta)) + range_start = None + range_end = None + upper_delta = None + lower_delta = None + + +def process_codepoint(val): + global range_start + global range_end + global upper_delta + global lower_delta + c = chr(val) + low = c.lower() + up = c.upper() + # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally + # upper-cases to the two-character string "SS" (the capital form + # is a relatively recent addition -- 2017). Our simple scheme + # can't handle this, so we skip it. Also, because our approach + # just represents runs of characters with identical folding + # deltas, this change must terminate the current run. + if (c == low and c == up) or len(low) != 1 or len(up) != 1: + finish_range() + return + updelta = ord(up) - val + lowdelta = ord(low) - val + if range_start is not None and (updelta != upper_delta or lowdelta != lower_delta): + finish_range() + if range_start is None: + range_start = val + upper_delta = updelta + lower_delta = lowdelta + range_end = val + + +for c in range(0, 0x10FFFF): + process_codepoint(c) + +with open("ada-casefold.h", "w") as f: + print( + gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"), + file=f, + ) + for r in all_ranges: + print(f" {{{r[0]}, {r[1]}, {r[2]}, {r[3]}}},", file=f) |