aboutsummaryrefslogtreecommitdiff
path: root/gcc/java/gen-table.pl
diff options
context:
space:
mode:
authorTom Tromey <tromey@cygnus.com>2000-12-13 22:47:13 +0000
committerTom Tromey <tromey@gcc.gnu.org>2000-12-13 22:47:13 +0000
commit3f27e3f86a86d69b43ed3572b979081e826b98bf (patch)
tree21ad640aa08f09f61043bd296ec51af6e339a547 /gcc/java/gen-table.pl
parent568aac9cf7c95ef4102838d126decf016d98ea46 (diff)
downloadgcc-3f27e3f86a86d69b43ed3572b979081e826b98bf.zip
gcc-3f27e3f86a86d69b43ed3572b979081e826b98bf.tar.gz
gcc-3f27e3f86a86d69b43ed3572b979081e826b98bf.tar.bz2
Make-lang.in (JAVA_LEX_C): Added chartables.h.
* Make-lang.in (JAVA_LEX_C): Added chartables.h. * lex.c (java_ignorable_control_p): Removed. (java_letter_or_digit_p): Removed. (java_start_char_p): New function. (java_read_char): Return `int', not `unicode_t'. Changed callers. (java_read_unicode): Likewise. (java_read_unicode_collapsing_terminators): Likewise. (java_get_unicode): Likewise. (java_new_lexer): Initialize hit_eof. (java_parse_end_comment): Take `int' argument. (java_parse_doc_section): Likewise. (java_parse_escape_sequence): Don't allow backlash-newline. Return `int'. * lex.h (JAVA_DIGIT_P): Removed. (_JAVA_LETTER_OR_DIGIT_P): Removed. (_JAVA_IDENTIFIER_IGNORABLE): Removed. (JAVA_START_CHAR_P): Renamed from JAVA_ID_CHAR_P. (JAVA_PART_CHAR_P): New macro. (UEOF): Now -1. (JAVA_CHAR_ERROR): Now -2. (java_lexer): New field `hit_eof'. * chartables.h: New file. * gen-table.pl: new file. From-SVN: r38237
Diffstat (limited to 'gcc/java/gen-table.pl')
-rw-r--r--gcc/java/gen-table.pl256
1 files changed, 256 insertions, 0 deletions
diff --git a/gcc/java/gen-table.pl b/gcc/java/gen-table.pl
new file mode 100644
index 0000000..d631ab3
--- /dev/null
+++ b/gcc/java/gen-table.pl
@@ -0,0 +1,256 @@
+#! /usr/bin/perl
+
+# Copyright (C) 2000 Free Software Foundation
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# gen-table.pl - Generate tables for gcj from Unicode data.
+# Usage: perl gen-table.pl DATA-FILE
+
+# Names of fields in Unicode data table.
+$CODE = 0;
+$NAME = 1;
+$CATEGORY = 2;
+$COMBINING_CLASSES = 3;
+$BIDI_CATEGORY = 4;
+$DECOMPOSITION = 5;
+$DECIMAL_VALUE = 6;
+$DIGIT_VALUE = 7;
+$NUMERIC_VALUE = 8;
+$MIRRORED = 9;
+$OLD_NAME = 10;
+$COMMENT = 11;
+$UPPER = 12;
+$LOWER = 13;
+$TITLE = 14;
+
+# Start of special-cased gaps in Unicode data table.
+%gaps = (
+ 0x4e00 => "CJK",
+ 0xac00 => "Hangul",
+ 0xd800 => "Unassigned High Surrogate",
+ 0xdb80 => "Private Use High Surrogate",
+ 0xdc00 => "Low Surrogate",
+ 0xe000 => "Private Use"
+ );
+
+# This lists control characters which are also considered whitespace.
+# This is a somewhat odd list, taken from the JCL definition of
+# Character.isIdentifierIgnorable.
+%whitespace_controls =
+ (
+ 0x0009 => 1,
+ 0x000a => 1,
+ 0x000b => 1,
+ 0x000c => 1,
+ 0x000d => 1,
+ 0x001c => 1,
+ 0x001d => 1,
+ 0x001e => 1,
+ 0x001f => 1
+ );
+
+open (INPUT, "< $ARGV[0]") || exit 1;
+
+$last_code = -1;
+while (<INPUT>)
+{
+ chop;
+ @fields = split (';', $_, 30);
+ if ($#fields != 14)
+ {
+ print STDERR "Entry for $fields[$CODE] has wrong number of fields\n";
+ }
+
+ $code = hex ($fields[$CODE]);
+ if ($code > $last_code + 1)
+ {
+ # Found a gap.
+ if (defined $gaps{$code})
+ {
+ # Fill the gap with the last character read.
+ @gfields = @fields;
+ }
+ else
+ {
+ # The gap represents undefined characters. Only the type
+ # matters.
+ @gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
+ '', '', '', '');
+ }
+ for (++$last_code; $last_code < $code; ++$last_code)
+ {
+ $gfields{$CODE} = sprintf ("%04x", $last_code);
+ &process_one ($last_code, @gfields);
+ }
+ }
+ &process_one ($code, @fields);
+ $last_code = $code;
+}
+
+close (INPUT);
+
+@gfields = ('', '', 'Cn', '0', '', '', '', '', '', '', '',
+ '', '', '', '');
+for (++$last_code; $last_code < 0x10000; ++$last_code)
+{
+ $gfields{$CODE} = sprintf ("%04x", $last_code);
+ &process_one ($last_code, @gfields);
+}
+--$last_code; # Want last to be 0xFFFF.
+
+&print_tables ($last_code);
+
+exit 0;
+
+# Process a single character.
+sub process_one
+{
+ my ($code, @fields) = @_;
+
+ my $value = '';
+ my $type = $fields[$CATEGORY];
+
+ # See if the character is a valid identifier start.
+ if ($type =~ /L./ # Letter
+ || $type eq 'Pc' # Connecting punctuation
+ || $type eq 'Sc') # Currency symbol
+ {
+ $value = 'LETTER_START';
+ }
+
+ # See if the character is a valid identifier member.
+ if ($type =~ /L./ # Letter
+ || $type eq 'Pc' # Connecting punctuation
+ || $type eq 'Sc' # Currency symbol
+ || $type =~ /N[dl]/ # Number: decimal or letter
+ || $type =~ /M[nc]/ # Mark: non-spacing or combining
+ || ($type eq 'Cc' # Certain controls
+ && ! defined $whitespace_controls{$code})
+ || ($code >= 0x200c # Join controls
+ && $code <= 0x200f)
+ || ($code >= 0x202a # Bidi controls -- note that there
+ # is a typo in the JCL where these are
+ # concerned.
+ && $code <= 0x202e)
+ || ($code >= 0x206a # Format controls
+ && $code <= 0x206f)
+ || $code == 0xfeff) # ZWNBSP
+ {
+ if ($value eq '')
+ {
+ $value = 'LETTER_PART';
+ }
+ else
+ {
+ $value = 'LETTER_PART | ' . $value;
+ }
+ }
+
+ if ($value eq '')
+ {
+ $value = '0';
+ }
+ else
+ {
+ $value = '(' . $value . ')';
+ }
+
+ $map[$code] = $value;
+}
+
+sub print_tables
+{
+ my ($last) = @_;
+
+ local ($bytes_out) = 0;
+
+ open (OUT, "> chartables.h");
+
+ print OUT "/* This file is automatically generated. DO NOT EDIT!\n";
+ print OUT " Instead, edit gen-table.pl and re-run. */\n\n";
+
+ print OUT "#ifndef CHARTABLES_H\n";
+ print OUT "#define CHARTABLES_H\n\n";
+
+ print OUT "#define LETTER_START 1\n";
+ print OUT "#define LETTER_PART 2\n\n";
+
+ for ($count = 0; $count <= $last; $count += 256)
+ {
+ $row[$count / 256] = &print_row ($count, '(char *) ', 'char', 1,
+ 'page');
+ }
+
+ print OUT "static char *type_table[256] = {\n";
+ for ($count = 0; $count <= $last; $count += 256)
+ {
+ print OUT ",\n" if $count > 0;
+ print OUT " ", $row[$count / 256];
+ $bytes_out += 4;
+ }
+ print OUT "\n};\n\n";
+
+ print OUT "#endif /* CHARTABLES_H */\n";
+
+ close (OUT);
+
+ printf "Generated %d bytes\n", $bytes_out;
+}
+
+# Print a single "row" of a two-level table.
+sub print_row
+{
+ my ($start, $def_pfx, $typname, $typsize, $name) = @_;
+
+ my ($i);
+ my (@values);
+ my ($flag) = 1;
+ my ($off);
+ for ($off = 0; $off < 256; ++$off)
+ {
+ $values[$off] = $map[$off + $start];
+ if ($values[$off] ne $values[0])
+ {
+ $flag = 0;
+ }
+ }
+ if ($flag)
+ {
+ return $def_pfx . $values[0];
+ }
+
+ printf OUT "static %s %s%d[256] = {\n ", $typname, $name, $start / 256;
+ my ($column) = 2;
+ for ($i = $start; $i < $start + 256; ++$i)
+ {
+ print OUT ", "
+ if $i > $start;
+ my ($text) = $values[$i - $start];
+ if (length ($text) + $column + 2 > 78)
+ {
+ print OUT "\n ";
+ $column = 2;
+ }
+ print OUT $text;
+ $column += length ($text) + 2;
+ }
+ print OUT "\n};\n\n";
+
+ $bytes_out += 256 * $typsize;
+
+ return sprintf "%s%d", $name, $start / 256;
+}