diff options
Diffstat (limited to 'libstdc++-v3/scripts/gen_text_encoding_data.py')
-rwxr-xr-x | libstdc++-v3/scripts/gen_text_encoding_data.py | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/libstdc++-v3/scripts/gen_text_encoding_data.py b/libstdc++-v3/scripts/gen_text_encoding_data.py new file mode 100755 index 0000000..2d6f3e4 --- /dev/null +++ b/libstdc++-v3/scripts/gen_text_encoding_data.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# +# Script to generate tables for libstdc++ std::text_encoding. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# To update the Libstdc++ static data in <bits/text_encoding-data.h> download +# the latest: +# https://www.iana.org/assignments/character-sets/character-sets-1.csv +# Then run this script and save the output to +# include/bits/text_encoding-data.h + +import sys +import csv + +if len(sys.argv) != 2: + print("Usage: %s <character sets csv>" % sys.argv[0], file=sys.stderr) + sys.exit(1) + +print("// Generated by gen_text_encoding_data.py, do not edit.\n") +print("#ifndef _GLIBCXX_GET_ENCODING_DATA") +print('# error "This is not a public header, do not include it directly"') +print("#endif\n") + + +charsets = {} +with open(sys.argv[1], newline='') as f: + reader = csv.reader(f) + next(reader) # skip header row + for row in reader: + mib = int(row[2]) + if mib in charsets: + raise ValueError("Multiple rows for mibEnum={}".format(mib)) + name = row[1] + aliases = row[5].split() + # Ensure primary name comes first + if name in aliases: + aliases.remove(name) + charsets[mib] = [name] + aliases + +# Remove "NATS-DANO" and "NATS-DANO-ADD" +charsets.pop(33, None) +charsets.pop(34, None) + +count = 0 +for mib in sorted(charsets.keys()): + names = charsets[mib] + if names[0] == "UTF-8": + print("#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET {}".format(count)) + for name in names: + print(' {{ {:4}, "{}" }},'.format(mib, name)) + count += len(names) + +# <text_encoding> gives an error if this macro is left defined. +# Do this last, so that the generated output is not usable unless we reach here. +print("\n#undef _GLIBCXX_GET_ENCODING_DATA") |