1 files changed, 70 insertions, 0 deletions
diff --git a/libstdc++-v3/scripts/gen_text_encoding_data.py b/libstdc++-v3/scripts/gen_text_encoding_data.py
new file mode 100755
index 0000000..2d6f3e4
--- /dev/null
+++ b/libstdc++-v3/scripts/gen_text_encoding_data.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+#
+# Script to generate tables for libstdc++ std::text_encoding.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# To update the Libstdc++ static data in <bits/text_encoding-data.h> download
+# the latest:
+# https://www.iana.org/assignments/character-sets/character-sets-1.csv
+# Then run this script and save the output to
+# include/bits/text_encoding-data.h
+
+import sys
+import csv
+
+if len(sys.argv) != 2:
+    print("Usage: %s <character sets csv>" % sys.argv[0], file=sys.stderr)
+    sys.exit(1)
+
+print("// Generated by gen_text_encoding_data.py, do not edit.\n")
+print("#ifndef _GLIBCXX_GET_ENCODING_DATA")
+print('# error "This is not a public header, do not include it directly"')
+print("#endif\n")
+
+
+charsets = {}
+with open(sys.argv[1], newline='') as f:
+    reader = csv.reader(f)
+    next(reader) # skip header row
+    for row in reader:
+        mib = int(row[2])
+        if mib in charsets:
+            raise ValueError("Multiple rows for mibEnum={}".format(mib))
+        name = row[1]
+        aliases = row[5].split()
+        # Ensure primary name comes first
+        if name in aliases:
+            aliases.remove(name)
+        charsets[mib] = [name] + aliases
+
+# Remove "NATS-DANO" and "NATS-DANO-ADD"
+charsets.pop(33, None)
+charsets.pop(34, None)
+
+count = 0
+for mib in sorted(charsets.keys()):
+    names = charsets[mib]
+    if names[0] == "UTF-8":
+        print("#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET {}".format(count))
+    for name in names:
+        print('  {{ {:4}, "{}" }},'.format(mib, name))
+    count += len(names)
+
+# <text_encoding> gives an error if this macro is left defined.
+# Do this last, so that the generated output is not usable unless we reach here.
+print("\n#undef _GLIBCXX_GET_ENCODING_DATA")