aboutsummaryrefslogtreecommitdiff
path: root/libstdc++-v3/scripts/gen_text_encoding_data.py
diff options
context:
space:
mode:
Diffstat (limited to 'libstdc++-v3/scripts/gen_text_encoding_data.py')
-rwxr-xr-xlibstdc++-v3/scripts/gen_text_encoding_data.py70
1 files changed, 70 insertions, 0 deletions
diff --git a/libstdc++-v3/scripts/gen_text_encoding_data.py b/libstdc++-v3/scripts/gen_text_encoding_data.py
new file mode 100755
index 0000000..2d6f3e4
--- /dev/null
+++ b/libstdc++-v3/scripts/gen_text_encoding_data.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+#
+# Script to generate tables for libstdc++ std::text_encoding.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+# To update the Libstdc++ static data in <bits/text_encoding-data.h> download
+# the latest:
+# https://www.iana.org/assignments/character-sets/character-sets-1.csv
+# Then run this script and save the output to
+# include/bits/text_encoding-data.h
+
+import sys
+import csv
+
+if len(sys.argv) != 2:
+ print("Usage: %s <character sets csv>" % sys.argv[0], file=sys.stderr)
+ sys.exit(1)
+
+print("// Generated by gen_text_encoding_data.py, do not edit.\n")
+print("#ifndef _GLIBCXX_GET_ENCODING_DATA")
+print('# error "This is not a public header, do not include it directly"')
+print("#endif\n")
+
+
+charsets = {}
+with open(sys.argv[1], newline='') as f:
+ reader = csv.reader(f)
+ next(reader) # skip header row
+ for row in reader:
+ mib = int(row[2])
+ if mib in charsets:
+ raise ValueError("Multiple rows for mibEnum={}".format(mib))
+ name = row[1]
+ aliases = row[5].split()
+ # Ensure primary name comes first
+ if name in aliases:
+ aliases.remove(name)
+ charsets[mib] = [name] + aliases
+
+# Remove "NATS-DANO" and "NATS-DANO-ADD"
+charsets.pop(33, None)
+charsets.pop(34, None)
+
+count = 0
+for mib in sorted(charsets.keys()):
+ names = charsets[mib]
+ if names[0] == "UTF-8":
+ print("#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET {}".format(count))
+ for name in names:
+ print(' {{ {:4}, "{}" }},'.format(mib, name))
+ count += len(names)
+
+# <text_encoding> gives an error if this macro is left defined.
+# Do this last, so that the generated output is not usable unless we reach here.
+print("\n#undef _GLIBCXX_GET_ENCODING_DATA")