Simplify lexer. Implement --enable-mapped-location support.

* jcf-parse.c (parse_class_file): Use linemap_line_start. (parse_source_file_1): Pass filename as extra parameter, so we can call linemap_add and set input_location here, rather than in both callers. (read_class): Pass copied filename to parse_source_file_1. Don't initialize wfl_operator - only needed for source compilation. (read_class, jcf_parse): Call linemap_add with LC_LEAVE. * lex.h: Remove a bunch of debugging macros. * lex.h (struct_java_line, struct java_error): Remove types. (JAVA_COLUMN_DELTA): Remove - use java_lexer.next_colums instead. (struct java_lc_s): Remove prev_col field. (struct java_lexer): New fields next_unicode, next_columns, and avail_unicode. New position field, and maybe token_start field. Don't need hit_eof field - use next_unicode == -1 instead. (JAVA_INTEGERAL_RANGE_ERROR): Rename to JAVA_RANGE_ERROR. (JAVA_RANGE_ERROR, JAVA_FLOAT_ANGE_ERROR): Update accordingly. * parse.h: Various changes for USE_MAPPED_LOCATION. (EXPR_WFL_EMIT_LINE_NOTE): XXX (BUILD_EXPR_WFL, EXPR_WFL_ADD_COL): Remove no-longer-used macros. (struct parser_ctxt): New file_start_location field. Remove p_line, c_line fields since we no longer save lines. Remove elc, lineno, and current_jcf fields - no longer used. * parse.y: Updates for USE_MAPPED_LOCATION and new lexer. Don't use EXPR_WFL_ADD_COL since that isn't trivial with source_location and is probably not needed anymore anyway. Use new expr_add_Location function. (SET_EXPR_LOCATION_FROM_TOKEN): New convenience macro. (java_pop_parser_context): Minor cleanup. (java_parser_context_save_global, java_parser_context_restore_global, java_pop_parser_context): Save/restore input_location as a unit. (issue_warning_error_from_context): If USE_MAPPED_LOCATION take a source_location instead of a wfl context node. (check_class_interface_creation): input_filename is not addressable. (create_artificial_method): Calling java_parser_context_save_global and java_parser_context_restore_global is overkill. Instead, temporarily set input_location from class decl. (java_layout_seen_class_methods): Set input_location from method decl. (fix_constructors): Make more robust if no EXPR_WITH_FILE_LOCATION. (finish_loop_body): Likewise. * lex.c: Updates for USE_MAPPED_LOCATION. Use build_unknwon_wfl. (java_sprint_unicode): Take a character, not index in line. (java_sneak_uncode): Replaced by java_peek_unicode. (java_unget_unicode): No longer used. (java_allocate_new_line. java_store_unicode): Removed, since we no longer remember "lines". (java_new_lexer): Update for new data structures. (java_read_char): Move unget_value checking to java_read_unicode. (java_get_unicode, java_peek_unicode, java_next_unicode): New more efficient functions that are used directly when lexing. (java_read_unicode_collapsing_terminators): No longer needed. (java_parse_end_comment, java_parse_escape_sequence, do_java_lex): Re-organize to use java_peek_unicode to avoid java_unget_unicode. (java_parse_escape_sequence): Rewrite to be simpler / more efficient. (do_java_lex): Lots of movings around to avoid java_unget_unicode, combine switch branches, and test for common token kinds earlier. (java_lex_error): Rewrite. * jv-scan.c (expand_location): New function, copied from tree.c. (main): Set ctxp->filename instead of setting input_filename directly. From-SVN: r88367
author: Per Bothner <per@bothner.com> 2004-09-30 16:35:07 -0700
committer: Per Bothner <bothner@gcc.gnu.org> 2004-09-30 16:35:07 -0700
commit: 9ec819d49ca815551b3fd8cd4e1202e898bac8d5 (patch)
tree: d3e8024e8ba3df366d03490e5511ea3378e8ed60 /gcc/java/lex.h
parent: cb3a14307a07a4f9d719f229caff8c47aed42624 (diff)
download: gcc-9ec819d49ca815551b3fd8cd4e1202e898bac8d5.zip
gcc-9ec819d49ca815551b3fd8cd4e1202e898bac8d5.tar.gz
gcc-9ec819d49ca815551b3fd8cd4e1202e898bac8d5.tar.bz2
1 files changed, 43 insertions, 78 deletions
diff --git a/gcc/java/lex.h b/gcc/java/lex.h
index bae5047..c9d5ac6 100644
--- a/gcc/java/lex.h
+++ b/gcc/java/lex.h
@@ -42,65 +42,9 @@ typedef unsigned short unicode_t;
 /* Default encoding to use if no encoding is specified.  */
 #define DEFAULT_ENCODING "UTF-8"
 
-/* Debug macro to print-out what we match  */
-#ifdef JAVA_LEX_DEBUG
-#ifdef JAVA_LEX_DEBUG_CHAR
-#define JAVA_LEX_CHAR(c)      printf ("java_lex:%d: char '%c'.%d\n", 	\
-				      lineno, (c < 128 ? c : '.'), c);
-#else
-#define JAVA_LEX_CHAR(c)
-#endif
-#define JAVA_LEX_KW(c)        printf ("java_lex:%d: keyword: '%s'\n", lineno,c)
-#define JAVA_LEX_ID(s)        printf ("java_lex:%d: ID: '%s'\n",	\
-				      lineno,				\
-				      (all_ascii ? s : "<U>"))
-#define JAVA_LEX_LIT(s, r)    printf ("java_lex:%d: literal '%s'_%d\n",	\
-				      lineno, s, r)
-#define JAVA_LEX_CHAR_LIT(s)  printf ("java_lex:%d: literal '%d'\n", lineno, s)
-#define JAVA_LEX_STR_LIT(s)   {						 \
-				 int i;					 \
-				 printf ("java_lex:%d: literal '%s'\n",  \
-					 lineno, s);			 \
-			       }
-#define JAVA_LEX_SEP(c)       printf ("java_lex:%d: separator '%c'\n",lineno,c)
-#define JAVA_LEX_OP(c)        printf ("java_lex:%d: operator '%s'\n", lineno,c)
-#else
-#define JAVA_LEX_CHAR(c)
-#define JAVA_LEX_KW(c)
-#define JAVA_LEX_ID(s)
-#define JAVA_LEX_LIT(s,r)
-#define JAVA_LEX_CHAR_LIT(s)
-#define JAVA_LEX_STR_LIT(s)
-#define JAVA_LEX_SEP(c)
-#define JAVA_LEX_OP(s)
-#endif
-
-/* Line information containers  */
-struct java_line {
-  unicode_t *line;		/* The line's unicode */
-  char      *unicode_escape_p;	/* The matching char was a unicode escape */
-  unicode_t ahead[1];		/* Character ahead */
-  char unicode_escape_ahead_p;	/* Character ahead is a unicode escape */
-  int max;			/* buffer's max size */
-  int size;			/* number of unicodes */
-  int current;			/* Current position, unicode based */
-  int char_col;			/* Current position, input char based */
-  int lineno;			/* Its line number */
-  int white_space_only;		/* If it contains only white spaces */
-};
-#define JAVA_COLUMN_DELTA(p)						\
-  (ctxp->c_line->unicode_escape_p [ctxp->c_line->current+(p)] ? 6 : 	\
-   (ctxp->c_line->line [ctxp->c_line->current+(p)] == '\t' ? 8 : 1))
-
-struct java_error {
-  struct java_line *line;
-  int error;
-};
-
 typedef struct java_lc_s GTY(()) {
-  int line;
-  int prev_col;
-  int col;
+  int line;		/* line number (1-based) */
+  int col;		/* column number number (1-based) */
 } java_lc;
 
 struct java_lexer
@@ -111,15 +55,33 @@ struct java_lexer
   /* Number of consecutive backslashes we've read.  */
   int bs_count;
 
-  /* If nonzero, a value that was pushed back.  */
+  /* Next available Unicode character.
+   * This is post-Unicode-escape-processing. -1 if EOF. */
+  int next_unicode;
+
+  /* True if next_unicode is next available character, or EOF. */
+  bool avail_unicode;
+
+  /* Number of source columns of the previous Unicode character (next_unicode).
+     If next_unicode==-2, then this is the number of columns of the previous
+     Unicode character (most recent result of java_{get,peek}_unicode). */
+  int next_columns;
+
+  /* If nonzero, a value that was pushed back.  This is a unicode character,
+     but (unlike next_unicode) is pre-'\uXXXX'-processing.  It is also used
+     when a '\r' is *not* followed by a '\n'. */
   unicode_t unget_value;
 
-  /* If nonzero, we've hit EOF.  Used only by java_get_unicode().  */
-  unsigned int hit_eof : 1;
-  
   /* Name of the character encoding we're using.  */
   const char *encoding;
 
+  /* Current source position. */
+  java_lc position;
+
+#ifndef USE_MAPPED_LOCATION
+  java_lc token_start;		     /* Error's line column info */
+#endif
+
 #ifdef HAVE_ICONV
   /* Nonzero if we've read any bytes.  We only recognize the
      byte-order-marker (BOM) as the first word.  */
@@ -168,7 +130,12 @@ extern void java_destroy_lexer (java_lexer *);
 #define JAVA_LINE_MAX 80
 
 /* Build a location compound integer */
-#define BUILD_LOCATION() ((ctxp->elc.line << 12) | (ctxp->elc.col & 0xfff))
+#ifdef USE_MAPPED_LOCATION
+#define BUILD_LOCATION() input_location
+#else
+#define BUILD_LOCATION() ((ctxp->lexer->token_start.line << 12) \
+			  | (ctxp->lexer->token_start.col & 0xfff))
+#endif
 
 /* Those macros are defined differently if we compile jc1-lite
    (JC1_LITE defined) or jc1.  */
@@ -190,7 +157,7 @@ extern void java_destroy_lexer (java_lexer *);
 #define SET_LVAL_NODE(NODE)
 #define BUILD_ID_WFL(EXP) (EXP)
 #define JAVA_FLOAT_RANGE_ERROR(S) {}
-#define JAVA_INTEGRAL_RANGE_ERROR(S) do { } while (0)
+#define JAVA_RANGE_ERROR(S) do { } while (0)
 
 #else
 
@@ -227,21 +194,19 @@ extern void java_destroy_lexer (java_lexer *);
 /* Wrap identifier around a wfl */
 #define BUILD_ID_WFL(EXP) build_wfl_node ((EXP))
 /* Special ways to report error on numeric literals  */
-#define JAVA_FLOAT_RANGE_ERROR(m)					  \
-  {									  \
-    char msg [1024];							  \
-    int i = ctxp->c_line->current;					  \
-    ctxp->c_line->current = number_beginning;				  \
-    sprintf (msg, "Floating point literal exceeds range of `%s'", (m)); \
-    java_lex_error (msg, 0);						  \
-    ctxp->c_line->current = i;						  \
+#define JAVA_FLOAT_RANGE_ERROR(m)					\
+  {									\
+    char *msg = xmalloc (100 + strlen (m));				\
+    sprintf (msg, "Floating point literal exceeds range of `%s'", (m));	\
+    JAVA_RANGE_ERROR(msg);						\
+    free (msg);								\
   }
-#define JAVA_INTEGRAL_RANGE_ERROR(m)		\
-  do {						\
-    int i = ctxp->c_line->current;		\
-    ctxp->c_line->current = number_beginning;	\
-    java_lex_error (m, 0);			\
-    ctxp->c_line->current = i;			\
+#define JAVA_RANGE_ERROR(msg)						\
+  do {									\
+    int save_col = ctxp->lexer->position.col;				\
+    ctxp->lexer->position.col = number_beginning;			\
+    java_lex_error (msg, 0);						\
+    ctxp->lexer->position.col = save_col;				\
   } while (0)
 
 #endif /* Definitions for jc1 compilation only */
author	Per Bothner <per@bothner.com>	2004-09-30 16:35:07 -0700
committer	Per Bothner <bothner@gcc.gnu.org>	2004-09-30 16:35:07 -0700
commit	9ec819d49ca815551b3fd8cd4e1202e898bac8d5 (patch)
tree	d3e8024e8ba3df366d03490e5511ea3378e8ed60 /gcc/java/lex.h
parent	cb3a14307a07a4f9d719f229caff8c47aed42624 (diff)
download	gcc-9ec819d49ca815551b3fd8cd4e1202e898bac8d5.zip gcc-9ec819d49ca815551b3fd8cd4e1202e898bac8d5.tar.gz gcc-9ec819d49ca815551b3fd8cd4e1202e898bac8d5.tar.bz2