aboutsummaryrefslogtreecommitdiff
path: root/gdb/ada-unicode.py
diff options
context:
space:
mode:
Diffstat (limited to 'gdb/ada-unicode.py')
-rwxr-xr-xgdb/ada-unicode.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/gdb/ada-unicode.py b/gdb/ada-unicode.py
new file mode 100755
index 0000000..4c4986b
--- /dev/null
+++ b/gdb/ada-unicode.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+
+# Generate Unicode case-folding table for Ada.
+
+# Copyright (C) 2022 Free Software Foundation, Inc.
+
+# This file is part of GDB.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# This generates the ada-casefold.h header.
+# Usage:
+# python ada-unicode.py
+
+import gdbcopyright
+
+# The start of the current range of case-conversions we are
+# processing. If RANGE_START is None, then we're outside of a range.
+range_start = None
+# End of the current range.
+range_end = None
+# The delta between RANGE_START and the upper-case variant of that
+# character.
+upper_delta = None
+# The delta between RANGE_START and the lower-case variant of that
+# character.
+lower_delta = None
+
+# All the ranges found and completed so far.
+# Each entry is a tuple of the form (START, END, UPPER_DELTA, LOWER_DELTA).
+all_ranges = []
+
+
+def finish_range():
+ global range_start
+ global range_end
+ global upper_delta
+ global lower_delta
+ if range_start is not None:
+ all_ranges.append((range_start, range_end, upper_delta, lower_delta))
+ range_start = None
+ range_end = None
+ upper_delta = None
+ lower_delta = None
+
+
+def process_codepoint(val):
+ global range_start
+ global range_end
+ global upper_delta
+ global lower_delta
+ c = chr(val)
+ low = c.lower()
+ up = c.upper()
+ # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally
+ # upper-cases to the two-character string "SS" (the capital form
+ # is a relatively recent addition -- 2017). Our simple scheme
+ # can't handle this, so we skip it. Also, because our approach
+ # just represents runs of characters with identical folding
+ # deltas, this change must terminate the current run.
+ if (c == low and c == up) or len(low) != 1 or len(up) != 1:
+ finish_range()
+ return
+ updelta = ord(up) - val
+ lowdelta = ord(low) - val
+ if range_start is not None and (updelta != upper_delta or lowdelta != lower_delta):
+ finish_range()
+ if range_start is None:
+ range_start = val
+ upper_delta = updelta
+ lower_delta = lowdelta
+ range_end = val
+
+
+for c in range(0, 0x10FFFF):
+ process_codepoint(c)
+
+with open("ada-casefold.h", "w") as f:
+ print(
+ gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"),
+ file=f,
+ )
+ for r in all_ranges:
+ print(f" {{{r[0]}, {r[1]}, {r[2]}, {r[3]}}},", file=f)