aboutsummaryrefslogtreecommitdiff
path: root/gcc/java/lex.c
diff options
context:
space:
mode:
authorTom Tromey <tromey@cygnus.com>2000-12-13 22:47:13 +0000
committerTom Tromey <tromey@gcc.gnu.org>2000-12-13 22:47:13 +0000
commit3f27e3f86a86d69b43ed3572b979081e826b98bf (patch)
tree21ad640aa08f09f61043bd296ec51af6e339a547 /gcc/java/lex.c
parent568aac9cf7c95ef4102838d126decf016d98ea46 (diff)
downloadgcc-3f27e3f86a86d69b43ed3572b979081e826b98bf.zip
gcc-3f27e3f86a86d69b43ed3572b979081e826b98bf.tar.gz
gcc-3f27e3f86a86d69b43ed3572b979081e826b98bf.tar.bz2
Make-lang.in (JAVA_LEX_C): Added chartables.h.
* Make-lang.in (JAVA_LEX_C): Added chartables.h. * lex.c (java_ignorable_control_p): Removed. (java_letter_or_digit_p): Removed. (java_start_char_p): New function. (java_read_char): Return `int', not `unicode_t'. Changed callers. (java_read_unicode): Likewise. (java_read_unicode_collapsing_terminators): Likewise. (java_get_unicode): Likewise. (java_new_lexer): Initialize hit_eof. (java_parse_end_comment): Take `int' argument. (java_parse_doc_section): Likewise. (java_parse_escape_sequence): Don't allow backlash-newline. Return `int'. * lex.h (JAVA_DIGIT_P): Removed. (_JAVA_LETTER_OR_DIGIT_P): Removed. (_JAVA_IDENTIFIER_IGNORABLE): Removed. (JAVA_START_CHAR_P): Renamed from JAVA_ID_CHAR_P. (JAVA_PART_CHAR_P): New macro. (UEOF): Now -1. (JAVA_CHAR_ERROR): Now -2. (java_lexer): New field `hit_eof'. * chartables.h: New file. * gen-table.pl: new file. From-SVN: r38237
Diffstat (limited to 'gcc/java/lex.c')
-rw-r--r--gcc/java/lex.c137
1 files changed, 89 insertions, 48 deletions
diff --git a/gcc/java/lex.c b/gcc/java/lex.c
index bb5e5b5..dddb3cd 100644
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -36,6 +36,7 @@ The Free Software Foundation is independent of Sun Microsystems, Inc. */
#include "keyword.h"
#include "flags.h"
+#include "chartables.h"
/* Function declaration */
static char *java_sprint_unicode PARAMS ((struct java_line *, int));
@@ -46,17 +47,17 @@ static int java_is_eol PARAMS ((FILE *, int));
static tree build_wfl_node PARAMS ((tree));
#endif
static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
-static unicode_t java_parse_escape_sequence PARAMS ((void));
-static int java_letter_or_digit_p PARAMS ((unicode_t));
-static int java_ignorable_control_p PARAMS ((unicode_t));
-static int java_parse_doc_section PARAMS ((unicode_t));
-static void java_parse_end_comment PARAMS ((unicode_t));
-static unicode_t java_get_unicode PARAMS ((void));
-static unicode_t java_read_unicode PARAMS ((java_lexer *, int *));
-static unicode_t java_read_unicode_collapsing_terminators
- PARAMS ((java_lexer *, int *));
+static int java_parse_escape_sequence PARAMS ((void));
+static int java_start_char_p PARAMS ((unicode_t));
+static int java_part_char_p PARAMS ((unicode_t));
+static int java_parse_doc_section PARAMS ((int));
+static void java_parse_end_comment PARAMS ((int));
+static int java_get_unicode PARAMS ((void));
+static int java_read_unicode PARAMS ((java_lexer *, int *));
+static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
+ int *));
static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
-static unicode_t java_read_char PARAMS ((java_lexer *));
+static int java_read_char PARAMS ((java_lexer *));
static void java_allocate_new_line PARAMS ((void));
static void java_unget_unicode PARAMS ((void));
static unicode_t java_sneak_unicode PARAMS ((void));
@@ -217,6 +218,7 @@ java_new_lexer (finput, encoding)
lex->finput = finput;
lex->bs_count = 0;
lex->unget_value = 0;
+ lex->hit_eof = 0;
#ifdef HAVE_ICONV
lex->handle = iconv_open ("UCS-2", encoding);
@@ -298,7 +300,7 @@ java_destroy_lexer (lex)
free (lex);
}
-static unicode_t
+static int
java_read_char (lex)
java_lexer *lex;
{
@@ -496,12 +498,12 @@ java_store_unicode (l, c, unicode_escape_p)
l->unicode_escape_p [l->size++] = unicode_escape_p;
}
-static unicode_t
+static int
java_read_unicode (lex, unicode_escape_p)
java_lexer *lex;
int *unicode_escape_p;
{
- unicode_t c;
+ int c;
c = java_read_char (lex);
*unicode_escape_p = 0;
@@ -549,12 +551,12 @@ java_read_unicode (lex, unicode_escape_p)
return (unicode_t) '\\';
}
-static unicode_t
+static int
java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
java_lexer *lex;
int *unicode_escape_p;
{
- unicode_t c = java_read_unicode (lex, unicode_escape_p);
+ int c = java_read_unicode (lex, unicode_escape_p);
if (c == '\r')
{
@@ -571,13 +573,18 @@ java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
return c;
}
-static unicode_t
+static int
java_get_unicode ()
{
/* It's time to read a line when... */
if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
{
- unicode_t c;
+ int c;
+ int found_chars = 0;
+
+ if (ctxp->lexer->hit_eof)
+ return UEOF;
+
java_allocate_new_line ();
if (ctxp->c_line->line[0] != '\n')
{
@@ -586,15 +593,24 @@ java_get_unicode ()
int unicode_escape_p;
c = java_read_unicode_collapsing_terminators (ctxp->lexer,
&unicode_escape_p);
- java_store_unicode (ctxp->c_line, c, unicode_escape_p);
- if (ctxp->c_line->white_space_only
- && !JAVA_WHITE_SPACE_P (c)
- && c != '\n'
- && c != UEOF)
- ctxp->c_line->white_space_only = 0;
+ if (c != UEOF)
+ {
+ found_chars = 1;
+ java_store_unicode (ctxp->c_line, c, unicode_escape_p);
+ if (ctxp->c_line->white_space_only
+ && !JAVA_WHITE_SPACE_P (c)
+ && c != '\n')
+ ctxp->c_line->white_space_only = 0;
+ }
if ((c == '\n') || (c == UEOF))
break;
}
+
+ if (c == UEOF && ! found_chars)
+ {
+ ctxp->lexer->hit_eof = 1;
+ return UEOF;
+ }
}
}
ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
@@ -606,9 +622,8 @@ java_get_unicode ()
* C is the first character following the '/' and '*'. */
static void
java_parse_end_comment (c)
- unicode_t c;
+ int c;
{
-
for ( ;; c = java_get_unicode ())
{
switch (c)
@@ -637,7 +652,7 @@ java_parse_end_comment (c)
static int
java_parse_doc_section (c)
- unicode_t c;
+ int c;
{
int valid_tag = 0, seen_star = 0;
@@ -655,10 +670,10 @@ java_parse_doc_section (c)
}
c = java_get_unicode();
}
-
+
if (c == UEOF)
java_lex_error ("Comment not terminated at end of input", 0);
-
+
if (seen_star && (c == '/'))
return 1; /* Goto step1 in caller */
@@ -673,7 +688,7 @@ java_parse_doc_section (c)
c = java_get_unicode ();
tag [tag_index++] = c;
}
-
+
if (c == UEOF)
java_lex_error ("Comment not terminated at end of input", 0);
tag [tag_index] = '\0';
@@ -685,28 +700,51 @@ java_parse_doc_section (c)
return 0;
}
-/* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
- will return a wrong result. */
+/* Return true if C is a valid start character for a Java identifier.
+ This is only called if C >= 128 -- smaller values are handled
+ inline. However, this function handles all values anyway. */
static int
-java_letter_or_digit_p (c)
+java_start_char_p (c)
unicode_t c;
{
- return _JAVA_LETTER_OR_DIGIT_P (c);
+ unsigned int hi = c / 256;
+ char *page = type_table[hi];
+ unsigned long val = (unsigned long) page;
+ int flags;
+
+ if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
+ flags = page[c & 255];
+ else
+ flags = val;
+
+ return flags & LETTER_START;
}
-/* This function to be used only by JAVA_ID_CHAR_P (). */
+/* Return true if C is a valid part character for a Java identifier.
+ This is only called if C >= 128 -- smaller values are handled
+ inline. However, this function handles all values anyway. */
static int
-java_ignorable_control_p (c)
+java_part_char_p (c)
unicode_t c;
{
- return _JAVA_IDENTIFIER_IGNORABLE (c);
+ unsigned int hi = c / 256;
+ char *page = type_table[hi];
+ unsigned long val = (unsigned long) page;
+ int flags;
+
+ if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
+ flags = page[c & 255];
+ else
+ flags = val;
+
+ return flags & LETTER_PART;
}
-static unicode_t
+static int
java_parse_escape_sequence ()
{
unicode_t char_lit;
- unicode_t c;
+ int c;
switch (c = java_get_unicode ())
{
@@ -754,8 +792,6 @@ java_parse_escape_sequence ()
return char_lit;
}
- case '\n':
- return '\n'; /* ULT, caught latter as a specific error */
default:
java_lex_error ("Invalid character in escape sequence", 0);
return JAVA_CHAR_ERROR;
@@ -840,7 +876,8 @@ java_lex (java_lval)
#endif
YYSTYPE *java_lval;
{
- unicode_t c, first_unicode;
+ int c;
+ unicode_t first_unicode;
int ascii_index, all_ascii;
char *string;
@@ -863,7 +900,7 @@ java_lex (java_lval)
if ((c = java_get_unicode ()) == UEOF)
return 0; /* Ok here */
else
- java_unget_unicode (); /* Caught latter at the end the function */
+ java_unget_unicode (); /* Caught later, at the end of the function */
}
/* Handle EOF here */
if (c == UEOF) /* Should probably do something here... */
@@ -1189,7 +1226,7 @@ java_lex (java_lval)
/* Character literals */
if (c == '\'')
{
- unicode_t char_lit;
+ int char_lit;
if ((c = java_get_unicode ()) == '\\')
char_lit = java_parse_escape_sequence ();
else
@@ -1206,7 +1243,7 @@ java_lex (java_lval)
if (c != '\'')
java_lex_error ("Syntax error in character literal", 0);
- if (c == JAVA_CHAR_ERROR)
+ if (char_lit == JAVA_CHAR_ERROR)
char_lit = 0; /* We silently convert it to zero */
JAVA_LEX_CHAR_LIT (char_lit);
@@ -1225,7 +1262,11 @@ java_lex (java_lval)
{
if (c == '\\')
c = java_parse_escape_sequence ();
- no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
+ if (c == JAVA_CHAR_ERROR)
+ {
+ no_error = 0;
+ c = 0; /* We silently convert it to zero. */
+ }
java_unicode_2_utf8 (c);
}
if (c == '\n' || c == UEOF) /* ULT */
@@ -1469,7 +1510,7 @@ java_lex (java_lval)
/* Keyword, boolean literal or null literal */
for (first_unicode = c, all_ascii = 1, ascii_index = 0;
- JAVA_ID_CHAR_P (c); c = java_get_unicode ())
+ JAVA_PART_CHAR_P (c); c = java_get_unicode ())
{
java_unicode_2_utf8 (c);
if (all_ascii && c >= 128)
@@ -1554,8 +1595,8 @@ java_lex (java_lval)
}
}
- /* We may have and ID here */
- if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
+ /* We may have an ID here */
+ if (JAVA_START_CHAR_P (first_unicode))
{
JAVA_LEX_ID (string);
java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));