aboutsummaryrefslogtreecommitdiff
path: root/libjava/scripts/encodings.pl
diff options
context:
space:
mode:
Diffstat (limited to 'libjava/scripts/encodings.pl')
-rw-r--r--libjava/scripts/encodings.pl62
1 files changed, 62 insertions, 0 deletions
diff --git a/libjava/scripts/encodings.pl b/libjava/scripts/encodings.pl
new file mode 100644
index 0000000..f2f6499
--- /dev/null
+++ b/libjava/scripts/encodings.pl
@@ -0,0 +1,62 @@
+# encodings.pl - Download IANA text and compute alias list.
+# Assumes you are running this program from gnu/gcj/convert/.
+# Output suitable for direct inclusion in IOConverter.java.
+
+# Map IANA canonical names onto our canonical names.
+%map = (
+ 'ISO_8859-1:1987' => '8859_1',
+ 'UTF-8' => 'UTF8',
+ 'Shift_JIS' => 'SJIS',
+ 'Extended_UNIX_Code_Packed_Format_for_Japanese' => 'EUCJIS'
+ );
+
+if ($ARGV[0] eq '')
+{
+ $file = 'character-sets';
+ if (! -f $file)
+ {
+ # Too painful to figure out how to get Perl to do it.
+ system 'wget -o .wget-log http://www.isi.edu/in-notes/iana/assignments/character-sets';
+ }
+}
+else
+{
+ $file = $ARGV[0];
+}
+
+open (INPUT, "< $file") || die "couldn't open $file: $!";
+
+$body = 0;
+$current = '';
+while (<INPUT>)
+{
+ chop;
+ $body = 1 if /^Name:/;
+ next unless $body;
+
+ if (/^$/)
+ {
+ $current = '';
+ next;
+ }
+
+ ($type, $name) = split (/\s+/);
+ if ($type eq 'Name:')
+ {
+ $current = $map{$name};
+ if ($current)
+ {
+ print " hash.put (\"$name\", \"$current\");\n";
+ }
+ }
+ elsif ($type eq 'Alias:')
+ {
+ # The IANA list has some ugliness.
+ if ($name ne '' && $name ne 'NONE' && $current)
+ {
+ print " hash.put (\"$name\", \"$current\");\n";
+ }
+ }
+}
+
+close (INPUT);