diff options
Diffstat (limited to 'libjava/scripts/encodings.pl')
-rw-r--r-- | libjava/scripts/encodings.pl | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/libjava/scripts/encodings.pl b/libjava/scripts/encodings.pl new file mode 100644 index 0000000..f2f6499 --- /dev/null +++ b/libjava/scripts/encodings.pl @@ -0,0 +1,62 @@ +# encodings.pl - Download IANA text and compute alias list. +# Assumes you are running this program from gnu/gcj/convert/. +# Output suitable for direct inclusion in IOConverter.java. + +# Map IANA canonical names onto our canonical names. +%map = ( + 'ISO_8859-1:1987' => '8859_1', + 'UTF-8' => 'UTF8', + 'Shift_JIS' => 'SJIS', + 'Extended_UNIX_Code_Packed_Format_for_Japanese' => 'EUCJIS' + ); + +if ($ARGV[0] eq '') +{ + $file = 'character-sets'; + if (! -f $file) + { + # Too painful to figure out how to get Perl to do it. + system 'wget -o .wget-log http://www.isi.edu/in-notes/iana/assignments/character-sets'; + } +} +else +{ + $file = $ARGV[0]; +} + +open (INPUT, "< $file") || die "couldn't open $file: $!"; + +$body = 0; +$current = ''; +while (<INPUT>) +{ + chop; + $body = 1 if /^Name:/; + next unless $body; + + if (/^$/) + { + $current = ''; + next; + } + + ($type, $name) = split (/\s+/); + if ($type eq 'Name:') + { + $current = $map{$name}; + if ($current) + { + print " hash.put (\"$name\", \"$current\");\n"; + } + } + elsif ($type eq 'Alias:') + { + # The IANA list has some ugliness. + if ($name ne '' && $name ne 'NONE' && $current) + { + print " hash.put (\"$name\", \"$current\");\n"; + } + } +} + +close (INPUT); |