diff options
Diffstat (limited to 'gcc/java/lex.c')
-rw-r--r-- | gcc/java/lex.c | 137 |
1 files changed, 89 insertions, 48 deletions
diff --git a/gcc/java/lex.c b/gcc/java/lex.c index bb5e5b5..dddb3cd 100644 --- a/gcc/java/lex.c +++ b/gcc/java/lex.c @@ -36,6 +36,7 @@ The Free Software Foundation is independent of Sun Microsystems, Inc. */ #include "keyword.h" #include "flags.h" +#include "chartables.h" /* Function declaration */ static char *java_sprint_unicode PARAMS ((struct java_line *, int)); @@ -46,17 +47,17 @@ static int java_is_eol PARAMS ((FILE *, int)); static tree build_wfl_node PARAMS ((tree)); #endif static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int)); -static unicode_t java_parse_escape_sequence PARAMS ((void)); -static int java_letter_or_digit_p PARAMS ((unicode_t)); -static int java_ignorable_control_p PARAMS ((unicode_t)); -static int java_parse_doc_section PARAMS ((unicode_t)); -static void java_parse_end_comment PARAMS ((unicode_t)); -static unicode_t java_get_unicode PARAMS ((void)); -static unicode_t java_read_unicode PARAMS ((java_lexer *, int *)); -static unicode_t java_read_unicode_collapsing_terminators - PARAMS ((java_lexer *, int *)); +static int java_parse_escape_sequence PARAMS ((void)); +static int java_start_char_p PARAMS ((unicode_t)); +static int java_part_char_p PARAMS ((unicode_t)); +static int java_parse_doc_section PARAMS ((int)); +static void java_parse_end_comment PARAMS ((int)); +static int java_get_unicode PARAMS ((void)); +static int java_read_unicode PARAMS ((java_lexer *, int *)); +static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *, + int *)); static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int)); -static unicode_t java_read_char PARAMS ((java_lexer *)); +static int java_read_char PARAMS ((java_lexer *)); static void java_allocate_new_line PARAMS ((void)); static void java_unget_unicode PARAMS ((void)); static unicode_t java_sneak_unicode PARAMS ((void)); @@ -217,6 +218,7 @@ java_new_lexer (finput, encoding) lex->finput = finput; lex->bs_count = 0; lex->unget_value = 0; + lex->hit_eof = 0; #ifdef HAVE_ICONV lex->handle = iconv_open ("UCS-2", encoding); @@ -298,7 +300,7 @@ java_destroy_lexer (lex) free (lex); } -static unicode_t +static int java_read_char (lex) java_lexer *lex; { @@ -496,12 +498,12 @@ java_store_unicode (l, c, unicode_escape_p) l->unicode_escape_p [l->size++] = unicode_escape_p; } -static unicode_t +static int java_read_unicode (lex, unicode_escape_p) java_lexer *lex; int *unicode_escape_p; { - unicode_t c; + int c; c = java_read_char (lex); *unicode_escape_p = 0; @@ -549,12 +551,12 @@ java_read_unicode (lex, unicode_escape_p) return (unicode_t) '\\'; } -static unicode_t +static int java_read_unicode_collapsing_terminators (lex, unicode_escape_p) java_lexer *lex; int *unicode_escape_p; { - unicode_t c = java_read_unicode (lex, unicode_escape_p); + int c = java_read_unicode (lex, unicode_escape_p); if (c == '\r') { @@ -571,13 +573,18 @@ java_read_unicode_collapsing_terminators (lex, unicode_escape_p) return c; } -static unicode_t +static int java_get_unicode () { /* It's time to read a line when... */ if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size) { - unicode_t c; + int c; + int found_chars = 0; + + if (ctxp->lexer->hit_eof) + return UEOF; + java_allocate_new_line (); if (ctxp->c_line->line[0] != '\n') { @@ -586,15 +593,24 @@ java_get_unicode () int unicode_escape_p; c = java_read_unicode_collapsing_terminators (ctxp->lexer, &unicode_escape_p); - java_store_unicode (ctxp->c_line, c, unicode_escape_p); - if (ctxp->c_line->white_space_only - && !JAVA_WHITE_SPACE_P (c) - && c != '\n' - && c != UEOF) - ctxp->c_line->white_space_only = 0; + if (c != UEOF) + { + found_chars = 1; + java_store_unicode (ctxp->c_line, c, unicode_escape_p); + if (ctxp->c_line->white_space_only + && !JAVA_WHITE_SPACE_P (c) + && c != '\n') + ctxp->c_line->white_space_only = 0; + } if ((c == '\n') || (c == UEOF)) break; } + + if (c == UEOF && ! found_chars) + { + ctxp->lexer->hit_eof = 1; + return UEOF; + } } } ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0); @@ -606,9 +622,8 @@ java_get_unicode () * C is the first character following the '/' and '*'. */ static void java_parse_end_comment (c) - unicode_t c; + int c; { - for ( ;; c = java_get_unicode ()) { switch (c) @@ -637,7 +652,7 @@ java_parse_end_comment (c) static int java_parse_doc_section (c) - unicode_t c; + int c; { int valid_tag = 0, seen_star = 0; @@ -655,10 +670,10 @@ java_parse_doc_section (c) } c = java_get_unicode(); } - + if (c == UEOF) java_lex_error ("Comment not terminated at end of input", 0); - + if (seen_star && (c == '/')) return 1; /* Goto step1 in caller */ @@ -673,7 +688,7 @@ java_parse_doc_section (c) c = java_get_unicode (); tag [tag_index++] = c; } - + if (c == UEOF) java_lex_error ("Comment not terminated at end of input", 0); tag [tag_index] = '\0'; @@ -685,28 +700,51 @@ java_parse_doc_section (c) return 0; } -/* This function to be used only by JAVA_ID_CHAR_P (), otherwise it - will return a wrong result. */ +/* Return true if C is a valid start character for a Java identifier. + This is only called if C >= 128 -- smaller values are handled + inline. However, this function handles all values anyway. */ static int -java_letter_or_digit_p (c) +java_start_char_p (c) unicode_t c; { - return _JAVA_LETTER_OR_DIGIT_P (c); + unsigned int hi = c / 256; + char *page = type_table[hi]; + unsigned long val = (unsigned long) page; + int flags; + + if ((val & ~ (LETTER_PART | LETTER_START)) != 0) + flags = page[c & 255]; + else + flags = val; + + return flags & LETTER_START; } -/* This function to be used only by JAVA_ID_CHAR_P (). */ +/* Return true if C is a valid part character for a Java identifier. + This is only called if C >= 128 -- smaller values are handled + inline. However, this function handles all values anyway. */ static int -java_ignorable_control_p (c) +java_part_char_p (c) unicode_t c; { - return _JAVA_IDENTIFIER_IGNORABLE (c); + unsigned int hi = c / 256; + char *page = type_table[hi]; + unsigned long val = (unsigned long) page; + int flags; + + if ((val & ~ (LETTER_PART | LETTER_START)) != 0) + flags = page[c & 255]; + else + flags = val; + + return flags & LETTER_PART; } -static unicode_t +static int java_parse_escape_sequence () { unicode_t char_lit; - unicode_t c; + int c; switch (c = java_get_unicode ()) { @@ -754,8 +792,6 @@ java_parse_escape_sequence () return char_lit; } - case '\n': - return '\n'; /* ULT, caught latter as a specific error */ default: java_lex_error ("Invalid character in escape sequence", 0); return JAVA_CHAR_ERROR; @@ -840,7 +876,8 @@ java_lex (java_lval) #endif YYSTYPE *java_lval; { - unicode_t c, first_unicode; + int c; + unicode_t first_unicode; int ascii_index, all_ascii; char *string; @@ -863,7 +900,7 @@ java_lex (java_lval) if ((c = java_get_unicode ()) == UEOF) return 0; /* Ok here */ else - java_unget_unicode (); /* Caught latter at the end the function */ + java_unget_unicode (); /* Caught later, at the end of the function */ } /* Handle EOF here */ if (c == UEOF) /* Should probably do something here... */ @@ -1189,7 +1226,7 @@ java_lex (java_lval) /* Character literals */ if (c == '\'') { - unicode_t char_lit; + int char_lit; if ((c = java_get_unicode ()) == '\\') char_lit = java_parse_escape_sequence (); else @@ -1206,7 +1243,7 @@ java_lex (java_lval) if (c != '\'') java_lex_error ("Syntax error in character literal", 0); - if (c == JAVA_CHAR_ERROR) + if (char_lit == JAVA_CHAR_ERROR) char_lit = 0; /* We silently convert it to zero */ JAVA_LEX_CHAR_LIT (char_lit); @@ -1225,7 +1262,11 @@ java_lex (java_lval) { if (c == '\\') c = java_parse_escape_sequence (); - no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0); + if (c == JAVA_CHAR_ERROR) + { + no_error = 0; + c = 0; /* We silently convert it to zero. */ + } java_unicode_2_utf8 (c); } if (c == '\n' || c == UEOF) /* ULT */ @@ -1469,7 +1510,7 @@ java_lex (java_lval) /* Keyword, boolean literal or null literal */ for (first_unicode = c, all_ascii = 1, ascii_index = 0; - JAVA_ID_CHAR_P (c); c = java_get_unicode ()) + JAVA_PART_CHAR_P (c); c = java_get_unicode ()) { java_unicode_2_utf8 (c); if (all_ascii && c >= 128) @@ -1554,8 +1595,8 @@ java_lex (java_lval) } } - /* We may have and ID here */ - if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode)) + /* We may have an ID here */ + if (JAVA_START_CHAR_P (first_unicode)) { JAVA_LEX_ID (string); java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string)); |