diff options
author | Tom Tromey <tromey@cygnus.com> | 2000-12-13 22:47:13 +0000 |
---|---|---|
committer | Tom Tromey <tromey@gcc.gnu.org> | 2000-12-13 22:47:13 +0000 |
commit | 3f27e3f86a86d69b43ed3572b979081e826b98bf (patch) | |
tree | 21ad640aa08f09f61043bd296ec51af6e339a547 /gcc/java/gen-table.pl | |
parent | 568aac9cf7c95ef4102838d126decf016d98ea46 (diff) | |
download | gcc-3f27e3f86a86d69b43ed3572b979081e826b98bf.zip gcc-3f27e3f86a86d69b43ed3572b979081e826b98bf.tar.gz gcc-3f27e3f86a86d69b43ed3572b979081e826b98bf.tar.bz2 |
Make-lang.in (JAVA_LEX_C): Added chartables.h.
* Make-lang.in (JAVA_LEX_C): Added chartables.h.
* lex.c (java_ignorable_control_p): Removed.
(java_letter_or_digit_p): Removed.
(java_start_char_p): New function.
(java_read_char): Return `int', not `unicode_t'. Changed
callers.
(java_read_unicode): Likewise.
(java_read_unicode_collapsing_terminators): Likewise.
(java_get_unicode): Likewise.
(java_new_lexer): Initialize hit_eof.
(java_parse_end_comment): Take `int' argument.
(java_parse_doc_section): Likewise.
(java_parse_escape_sequence): Don't allow backlash-newline.
Return `int'.
* lex.h (JAVA_DIGIT_P): Removed.
(_JAVA_LETTER_OR_DIGIT_P): Removed.
(_JAVA_IDENTIFIER_IGNORABLE): Removed.
(JAVA_START_CHAR_P): Renamed from JAVA_ID_CHAR_P.
(JAVA_PART_CHAR_P): New macro.
(UEOF): Now -1.
(JAVA_CHAR_ERROR): Now -2.
(java_lexer): New field `hit_eof'.
* chartables.h: New file.
* gen-table.pl: new file.
From-SVN: r38237
Diffstat (limited to 'gcc/java/gen-table.pl')
-rw-r--r-- | gcc/java/gen-table.pl | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/gcc/java/gen-table.pl b/gcc/java/gen-table.pl new file mode 100644 index 0000000..d631ab3 --- /dev/null +++ b/gcc/java/gen-table.pl @@ -0,0 +1,256 @@ +#! /usr/bin/perl + +# Copyright (C) 2000 Free Software Foundation + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +# gen-table.pl - Generate tables for gcj from Unicode data. +# Usage: perl gen-table.pl DATA-FILE + +# Names of fields in Unicode data table. +$CODE = 0; +$NAME = 1; +$CATEGORY = 2; +$COMBINING_CLASSES = 3; +$BIDI_CATEGORY = 4; +$DECOMPOSITION = 5; +$DECIMAL_VALUE = 6; +$DIGIT_VALUE = 7; +$NUMERIC_VALUE = 8; +$MIRRORED = 9; +$OLD_NAME = 10; +$COMMENT = 11; +$UPPER = 12; +$LOWER = 13; +$TITLE = 14; + +# Start of special-cased gaps in Unicode data table. +%gaps = ( + 0x4e00 => "CJK", + 0xac00 => "Hangul", + 0xd800 => "Unassigned High Surrogate", + 0xdb80 => "Private Use High Surrogate", + 0xdc00 => "Low Surrogate", + 0xe000 => "Private Use" + ); + +# This lists control characters which are also considered whitespace. +# This is a somewhat odd list, taken from the JCL definition of +# Character.isIdentifierIgnorable. +%whitespace_controls = + ( + 0x0009 => 1, + 0x000a => 1, + 0x000b => 1, + 0x000c => 1, + 0x000d => 1, + 0x001c => 1, + 0x001d => 1, + 0x001e => 1, + 0x001f => 1 + ); + +open (INPUT, "< $ARGV[0]") || exit 1; + +$last_code = -1; +while (<INPUT>) +{ + chop; + @fields = split (';', $_, 30); + if ($#fields != 14) + { + print STDERR "Entry for $fields[$CODE] has wrong number of fields\n"; + } + + $code = hex ($fields[$CODE]); + if ($code > $last_code + 1) + { + # Found a gap. + if (defined $gaps{$code}) + { + # Fill the gap with the last character read. + @gfields = @fields; + } + else + { + # The gap represents undefined characters. Only the type + # matters. + @gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '', + '', '', '', ''); + } + for (++$last_code; $last_code < $code; ++$last_code) + { + $gfields{$CODE} = sprintf ("%04x", $last_code); + &process_one ($last_code, @gfields); + } + } + &process_one ($code, @fields); + $last_code = $code; +} + +close (INPUT); + +@gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '', + '', '', '', ''); +for (++$last_code; $last_code < 0x10000; ++$last_code) +{ + $gfields{$CODE} = sprintf ("%04x", $last_code); + &process_one ($last_code, @gfields); +} +--$last_code; # Want last to be 0xFFFF. + +&print_tables ($last_code); + +exit 0; + +# Process a single character. +sub process_one +{ + my ($code, @fields) = @_; + + my $value = ''; + my $type = $fields[$CATEGORY]; + + # See if the character is a valid identifier start. + if ($type =~ /L./ # Letter + || $type eq 'Pc' # Connecting punctuation + || $type eq 'Sc') # Currency symbol + { + $value = 'LETTER_START'; + } + + # See if the character is a valid identifier member. + if ($type =~ /L./ # Letter + || $type eq 'Pc' # Connecting punctuation + || $type eq 'Sc' # Currency symbol + || $type =~ /N[dl]/ # Number: decimal or letter + || $type =~ /M[nc]/ # Mark: non-spacing or combining + || ($type eq 'Cc' # Certain controls + && ! defined $whitespace_controls{$code}) + || ($code >= 0x200c # Join controls + && $code <= 0x200f) + || ($code >= 0x202a # Bidi controls -- note that there + # is a typo in the JCL where these are + # concerned. + && $code <= 0x202e) + || ($code >= 0x206a # Format controls + && $code <= 0x206f) + || $code == 0xfeff) # ZWNBSP + { + if ($value eq '') + { + $value = 'LETTER_PART'; + } + else + { + $value = 'LETTER_PART | ' . $value; + } + } + + if ($value eq '') + { + $value = '0'; + } + else + { + $value = '(' . $value . ')'; + } + + $map[$code] = $value; +} + +sub print_tables +{ + my ($last) = @_; + + local ($bytes_out) = 0; + + open (OUT, "> chartables.h"); + + print OUT "/* This file is automatically generated. DO NOT EDIT!\n"; + print OUT " Instead, edit gen-table.pl and re-run. */\n\n"; + + print OUT "#ifndef CHARTABLES_H\n"; + print OUT "#define CHARTABLES_H\n\n"; + + print OUT "#define LETTER_START 1\n"; + print OUT "#define LETTER_PART 2\n\n"; + + for ($count = 0; $count <= $last; $count += 256) + { + $row[$count / 256] = &print_row ($count, '(char *) ', 'char', 1, + 'page'); + } + + print OUT "static char *type_table[256] = {\n"; + for ($count = 0; $count <= $last; $count += 256) + { + print OUT ",\n" if $count > 0; + print OUT " ", $row[$count / 256]; + $bytes_out += 4; + } + print OUT "\n};\n\n"; + + print OUT "#endif /* CHARTABLES_H */\n"; + + close (OUT); + + printf "Generated %d bytes\n", $bytes_out; +} + +# Print a single "row" of a two-level table. +sub print_row +{ + my ($start, $def_pfx, $typname, $typsize, $name) = @_; + + my ($i); + my (@values); + my ($flag) = 1; + my ($off); + for ($off = 0; $off < 256; ++$off) + { + $values[$off] = $map[$off + $start]; + if ($values[$off] ne $values[0]) + { + $flag = 0; + } + } + if ($flag) + { + return $def_pfx . $values[0]; + } + + printf OUT "static %s %s%d[256] = {\n ", $typname, $name, $start / 256; + my ($column) = 2; + for ($i = $start; $i < $start + 256; ++$i) + { + print OUT ", " + if $i > $start; + my ($text) = $values[$i - $start]; + if (length ($text) + $column + 2 > 78) + { + print OUT "\n "; + $column = 2; + } + print OUT $text; + $column += length ($text) + 2; + } + print OUT "\n};\n\n"; + + $bytes_out += 256 * $typsize; + + return sprintf "%s%d", $name, $start / 256; +} |