diff options
author | Per Bothner <per@bothner.com> | 2004-09-30 16:35:07 -0700 |
---|---|---|
committer | Per Bothner <bothner@gcc.gnu.org> | 2004-09-30 16:35:07 -0700 |
commit | 9ec819d49ca815551b3fd8cd4e1202e898bac8d5 (patch) | |
tree | d3e8024e8ba3df366d03490e5511ea3378e8ed60 /gcc/java/lex.h | |
parent | cb3a14307a07a4f9d719f229caff8c47aed42624 (diff) | |
download | gcc-9ec819d49ca815551b3fd8cd4e1202e898bac8d5.zip gcc-9ec819d49ca815551b3fd8cd4e1202e898bac8d5.tar.gz gcc-9ec819d49ca815551b3fd8cd4e1202e898bac8d5.tar.bz2 |
Simplify lexer. Implement --enable-mapped-location support.
* jcf-parse.c (parse_class_file): Use linemap_line_start.
(parse_source_file_1): Pass filename as extra parameter, so we can call
linemap_add and set input_location here, rather than in both callers.
(read_class): Pass copied filename to parse_source_file_1.
Don't initialize wfl_operator - only needed for source compilation.
(read_class, jcf_parse): Call linemap_add with LC_LEAVE.
* lex.h: Remove a bunch of debugging macros.
* lex.h (struct_java_line, struct java_error): Remove types.
(JAVA_COLUMN_DELTA): Remove - use java_lexer.next_colums instead.
(struct java_lc_s): Remove prev_col field.
(struct java_lexer): New fields next_unicode, next_columns, and
avail_unicode. New position field, and maybe token_start field.
Don't need hit_eof field - use next_unicode == -1 instead.
(JAVA_INTEGERAL_RANGE_ERROR): Rename to JAVA_RANGE_ERROR.
(JAVA_RANGE_ERROR, JAVA_FLOAT_ANGE_ERROR): Update accordingly.
* parse.h: Various changes for USE_MAPPED_LOCATION.
(EXPR_WFL_EMIT_LINE_NOTE): XXX
(BUILD_EXPR_WFL, EXPR_WFL_ADD_COL): Remove no-longer-used macros.
(struct parser_ctxt): New file_start_location field.
Remove p_line, c_line fields since we no longer save lines.
Remove elc, lineno, and current_jcf fields - no longer used.
* parse.y: Updates for USE_MAPPED_LOCATION and new lexer.
Don't use EXPR_WFL_ADD_COL since that isn't trivial with
source_location and is probably not needed anymore anyway.
Use new expr_add_Location function.
(SET_EXPR_LOCATION_FROM_TOKEN): New convenience macro.
(java_pop_parser_context): Minor cleanup.
(java_parser_context_save_global, java_parser_context_restore_global,
java_pop_parser_context): Save/restore input_location as a unit.
(issue_warning_error_from_context): If USE_MAPPED_LOCATION take
a source_location instead of a wfl context node.
(check_class_interface_creation): input_filename is not addressable.
(create_artificial_method): Calling java_parser_context_save_global
and java_parser_context_restore_global is overkill. Instead,
temporarily set input_location from class decl.
(java_layout_seen_class_methods): Set input_location from method decl.
(fix_constructors): Make more robust if no EXPR_WITH_FILE_LOCATION.
(finish_loop_body): Likewise.
* lex.c: Updates for USE_MAPPED_LOCATION. Use build_unknwon_wfl.
(java_sprint_unicode): Take a character, not index in line.
(java_sneak_uncode): Replaced by java_peek_unicode.
(java_unget_unicode): No longer used.
(java_allocate_new_line. java_store_unicode): Removed, since we
no longer remember "lines".
(java_new_lexer): Update for new data structures.
(java_read_char): Move unget_value checking to java_read_unicode.
(java_get_unicode, java_peek_unicode, java_next_unicode): New more
efficient functions that are used directly when lexing.
(java_read_unicode_collapsing_terminators): No longer needed.
(java_parse_end_comment, java_parse_escape_sequence, do_java_lex):
Re-organize to use java_peek_unicode to avoid java_unget_unicode.
(java_parse_escape_sequence): Rewrite to be simpler / more efficient.
(do_java_lex): Lots of movings around to avoid java_unget_unicode,
combine switch branches, and test for common token kinds earlier.
(java_lex_error): Rewrite.
* jv-scan.c (expand_location): New function, copied from tree.c.
(main): Set ctxp->filename instead of setting input_filename directly.
From-SVN: r88367
Diffstat (limited to 'gcc/java/lex.h')
-rw-r--r-- | gcc/java/lex.h | 121 |
1 files changed, 43 insertions, 78 deletions
diff --git a/gcc/java/lex.h b/gcc/java/lex.h index bae5047..c9d5ac6 100644 --- a/gcc/java/lex.h +++ b/gcc/java/lex.h @@ -42,65 +42,9 @@ typedef unsigned short unicode_t; /* Default encoding to use if no encoding is specified. */ #define DEFAULT_ENCODING "UTF-8" -/* Debug macro to print-out what we match */ -#ifdef JAVA_LEX_DEBUG -#ifdef JAVA_LEX_DEBUG_CHAR -#define JAVA_LEX_CHAR(c) printf ("java_lex:%d: char '%c'.%d\n", \ - lineno, (c < 128 ? c : '.'), c); -#else -#define JAVA_LEX_CHAR(c) -#endif -#define JAVA_LEX_KW(c) printf ("java_lex:%d: keyword: '%s'\n", lineno,c) -#define JAVA_LEX_ID(s) printf ("java_lex:%d: ID: '%s'\n", \ - lineno, \ - (all_ascii ? s : "<U>")) -#define JAVA_LEX_LIT(s, r) printf ("java_lex:%d: literal '%s'_%d\n", \ - lineno, s, r) -#define JAVA_LEX_CHAR_LIT(s) printf ("java_lex:%d: literal '%d'\n", lineno, s) -#define JAVA_LEX_STR_LIT(s) { \ - int i; \ - printf ("java_lex:%d: literal '%s'\n", \ - lineno, s); \ - } -#define JAVA_LEX_SEP(c) printf ("java_lex:%d: separator '%c'\n",lineno,c) -#define JAVA_LEX_OP(c) printf ("java_lex:%d: operator '%s'\n", lineno,c) -#else -#define JAVA_LEX_CHAR(c) -#define JAVA_LEX_KW(c) -#define JAVA_LEX_ID(s) -#define JAVA_LEX_LIT(s,r) -#define JAVA_LEX_CHAR_LIT(s) -#define JAVA_LEX_STR_LIT(s) -#define JAVA_LEX_SEP(c) -#define JAVA_LEX_OP(s) -#endif - -/* Line information containers */ -struct java_line { - unicode_t *line; /* The line's unicode */ - char *unicode_escape_p; /* The matching char was a unicode escape */ - unicode_t ahead[1]; /* Character ahead */ - char unicode_escape_ahead_p; /* Character ahead is a unicode escape */ - int max; /* buffer's max size */ - int size; /* number of unicodes */ - int current; /* Current position, unicode based */ - int char_col; /* Current position, input char based */ - int lineno; /* Its line number */ - int white_space_only; /* If it contains only white spaces */ -}; -#define JAVA_COLUMN_DELTA(p) \ - (ctxp->c_line->unicode_escape_p [ctxp->c_line->current+(p)] ? 6 : \ - (ctxp->c_line->line [ctxp->c_line->current+(p)] == '\t' ? 8 : 1)) - -struct java_error { - struct java_line *line; - int error; -}; - typedef struct java_lc_s GTY(()) { - int line; - int prev_col; - int col; + int line; /* line number (1-based) */ + int col; /* column number number (1-based) */ } java_lc; struct java_lexer @@ -111,15 +55,33 @@ struct java_lexer /* Number of consecutive backslashes we've read. */ int bs_count; - /* If nonzero, a value that was pushed back. */ + /* Next available Unicode character. + * This is post-Unicode-escape-processing. -1 if EOF. */ + int next_unicode; + + /* True if next_unicode is next available character, or EOF. */ + bool avail_unicode; + + /* Number of source columns of the previous Unicode character (next_unicode). + If next_unicode==-2, then this is the number of columns of the previous + Unicode character (most recent result of java_{get,peek}_unicode). */ + int next_columns; + + /* If nonzero, a value that was pushed back. This is a unicode character, + but (unlike next_unicode) is pre-'\uXXXX'-processing. It is also used + when a '\r' is *not* followed by a '\n'. */ unicode_t unget_value; - /* If nonzero, we've hit EOF. Used only by java_get_unicode(). */ - unsigned int hit_eof : 1; - /* Name of the character encoding we're using. */ const char *encoding; + /* Current source position. */ + java_lc position; + +#ifndef USE_MAPPED_LOCATION + java_lc token_start; /* Error's line column info */ +#endif + #ifdef HAVE_ICONV /* Nonzero if we've read any bytes. We only recognize the byte-order-marker (BOM) as the first word. */ @@ -168,7 +130,12 @@ extern void java_destroy_lexer (java_lexer *); #define JAVA_LINE_MAX 80 /* Build a location compound integer */ -#define BUILD_LOCATION() ((ctxp->elc.line << 12) | (ctxp->elc.col & 0xfff)) +#ifdef USE_MAPPED_LOCATION +#define BUILD_LOCATION() input_location +#else +#define BUILD_LOCATION() ((ctxp->lexer->token_start.line << 12) \ + | (ctxp->lexer->token_start.col & 0xfff)) +#endif /* Those macros are defined differently if we compile jc1-lite (JC1_LITE defined) or jc1. */ @@ -190,7 +157,7 @@ extern void java_destroy_lexer (java_lexer *); #define SET_LVAL_NODE(NODE) #define BUILD_ID_WFL(EXP) (EXP) #define JAVA_FLOAT_RANGE_ERROR(S) {} -#define JAVA_INTEGRAL_RANGE_ERROR(S) do { } while (0) +#define JAVA_RANGE_ERROR(S) do { } while (0) #else @@ -227,21 +194,19 @@ extern void java_destroy_lexer (java_lexer *); /* Wrap identifier around a wfl */ #define BUILD_ID_WFL(EXP) build_wfl_node ((EXP)) /* Special ways to report error on numeric literals */ -#define JAVA_FLOAT_RANGE_ERROR(m) \ - { \ - char msg [1024]; \ - int i = ctxp->c_line->current; \ - ctxp->c_line->current = number_beginning; \ - sprintf (msg, "Floating point literal exceeds range of `%s'", (m)); \ - java_lex_error (msg, 0); \ - ctxp->c_line->current = i; \ +#define JAVA_FLOAT_RANGE_ERROR(m) \ + { \ + char *msg = xmalloc (100 + strlen (m)); \ + sprintf (msg, "Floating point literal exceeds range of `%s'", (m)); \ + JAVA_RANGE_ERROR(msg); \ + free (msg); \ } -#define JAVA_INTEGRAL_RANGE_ERROR(m) \ - do { \ - int i = ctxp->c_line->current; \ - ctxp->c_line->current = number_beginning; \ - java_lex_error (m, 0); \ - ctxp->c_line->current = i; \ +#define JAVA_RANGE_ERROR(msg) \ + do { \ + int save_col = ctxp->lexer->position.col; \ + ctxp->lexer->position.col = number_beginning; \ + java_lex_error (msg, 0); \ + ctxp->lexer->position.col = save_col; \ } while (0) #endif /* Definitions for jc1 compilation only */ |