lex.h (_JAVA_IDENTIFIER_IGNORABLE): New macro.

* lex.h (_JAVA_IDENTIFIER_IGNORABLE): New macro. (JAVA_ID_CHAR_P): Also try java_ignorable_control_p. * lex.c (java_read_unicode): Removed `term_context' argument. Recognize any number of `u' in `\u'. (java_read_unicode_collapsing_terminators): New function. (java_get_unicode): Use it. (java_lineterminator): Removed. (yylex): Produce error if character literal is newline or single quote. Return if eof found in middle of `//' comment. EOF in `//' comment is only an error if pedantic. (java_ignorable_control_p): New function. (java_parse_end_comment): Return if eof found in middle of comment. Include flags.h. * jv-scan.c (pedantic): New global. From-SVN: r37232
author: Tom Tromey <tromey@cygnus.com> 2000-11-03 20:27:07 +0000
committer: Tom Tromey <tromey@gcc.gnu.org> 2000-11-03 20:27:07 +0000
commit: 747800ee8a9782cea34d002fe6a53439da75b25c (patch)
tree: 3ec9a05402bdb242bd150aacc46596d394cd2d1d /gcc/java/lex.c
parent: 35e9340fc9994dbd2025f8770593fa0136e33740 (diff)
download: gcc-747800ee8a9782cea34d002fe6a53439da75b25c.zip
gcc-747800ee8a9782cea34d002fe6a53439da75b25c.tar.gz
gcc-747800ee8a9782cea34d002fe6a53439da75b25c.tar.bz2
1 files changed, 78 insertions, 56 deletions
diff --git a/gcc/java/lex.c b/gcc/java/lex.c
index b26499b..2c123ce 100644
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -35,9 +35,9 @@ The Free Software Foundation is independent of Sun Microsystems, Inc.  */
    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
 
 #include "keyword.h"
+#include "flags.h"
 
 /* Function declaration  */
-static int java_lineterminator PARAMS ((unicode_t));
 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
 static void java_unicode_2_utf8 PARAMS ((unicode_t));
 static void java_lex_error PARAMS ((const char *, int));
@@ -48,10 +48,13 @@ static tree build_wfl_node PARAMS ((tree));
 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
 static unicode_t java_parse_escape_sequence PARAMS ((void));
 static int java_letter_or_digit_p PARAMS ((unicode_t));
+static int java_ignorable_control_p PARAMS ((unicode_t));
 static int java_parse_doc_section PARAMS ((unicode_t));
 static void java_parse_end_comment PARAMS ((unicode_t));
 static unicode_t java_get_unicode PARAMS ((void));
-static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
+static unicode_t java_read_unicode PARAMS ((java_lexer *, int *));
+static unicode_t java_read_unicode_collapsing_terminators
+    PARAMS ((java_lexer *, int *));
 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
 static unicode_t java_read_char PARAMS ((java_lexer *));
 static void java_allocate_new_line PARAMS ((void));
@@ -494,9 +497,8 @@ java_store_unicode (l, c, unicode_escape_p)
 }
 
 static unicode_t
-java_read_unicode (lex, term_context, unicode_escape_p)
+java_read_unicode (lex, unicode_escape_p)
      java_lexer *lex;
-     int term_context;
      int *unicode_escape_p;
 {
   unicode_t c;
@@ -507,9 +509,7 @@ java_read_unicode (lex, term_context, unicode_escape_p)
   if (c != '\\')
     {
       lex->bs_count = 0;
-      return (term_context ? c : (java_lineterminator (c)
-				  ? '\n'
-				  : (unicode_t) c));
+      return c;
     }
 
   ++lex->bs_count;
@@ -532,13 +532,17 @@ java_read_unicode (lex, term_context, unicode_escape_p)
 		unicode |= (unicode_t)((c-'0') << shift);
 	      else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
 	        unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
+	      else if (c == 'u')
+		{
+		  /* Recognize any number of u in \u.  */
+		  shift += 4;
+		}
 	      else
 		java_lex_error ("Non hex digit in Unicode escape sequence", 0);
 	    }
 	  lex->bs_count = 0;
 	  *unicode_escape_p = 1;
-	  return (term_context
-		  ? unicode : (java_lineterminator (c) ? '\n' : unicode));
+	  return unicode;
 	}
       lex->unget_value = c;
     }
@@ -546,6 +550,28 @@ java_read_unicode (lex, term_context, unicode_escape_p)
 }
 
 static unicode_t
+java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
+     java_lexer *lex;
+     int *unicode_escape_p;
+{
+  unicode_t c = java_read_unicode (lex, unicode_escape_p);
+
+  if (c == '\r')
+    {
+      /* We have to read ahead to see if we got \r\n.  In that case we
+	 return a single line terminator.  */
+      int dummy;
+      c = java_read_unicode (lex, &dummy);
+      if (c != '\n')
+	lex->unget_value = c;
+      /* In either case we must return a newline.  */
+      c = '\n';
+    }
+
+  return c;
+}
+
+static unicode_t
 java_get_unicode ()
 {
   /* It's time to read a line when... */
@@ -554,54 +580,28 @@ java_get_unicode ()
       unicode_t c;
       java_allocate_new_line ();
       if (ctxp->c_line->line[0] != '\n')
-	for (;;)
-	  {
-	    int unicode_escape_p;
-	    c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
-	    java_store_unicode (ctxp->c_line, c, unicode_escape_p);
-	    if (ctxp->c_line->white_space_only 
-		&& !JAVA_WHITE_SPACE_P (c) && c!='\n')
-	      ctxp->c_line->white_space_only = 0;
-	    if ((c == '\n') || (c == UEOF))
-	      break;
-	  }
+	{
+	  for (;;)
+	    {
+	      int unicode_escape_p;
+	      c = java_read_unicode_collapsing_terminators (ctxp->lexer,
+							    &unicode_escape_p);
+	      java_store_unicode (ctxp->c_line, c, unicode_escape_p);
+	      if (ctxp->c_line->white_space_only 
+		  && !JAVA_WHITE_SPACE_P (c)
+		  && c != '\n'
+		  && c != UEOF)
+		ctxp->c_line->white_space_only = 0;
+	      if ((c == '\n') || (c == UEOF))
+		break;
+	    }
+	}
     }
   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
   return ctxp->c_line->line [ctxp->c_line->current++];
 }
 
-static int
-java_lineterminator (c)
-     unicode_t c;
-{
-  if (c == '\n')		/* LF */
-    return 1;
-  else if (c == '\r')		/* CR */
-    {
-      int unicode_escape_p;
-      c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
-      if (c == '\r')
-	{
-	  /* In this case we will have another terminator.  For some
-	     reason the lexer has several different unget methods.  We
-	     can't use the `ahead' method because then the \r will end
-	     up in the actual text of the line, causing an error.  So
-	     instead we choose a very low-level method.  FIXME: this
-	     is incredibly ugly.  */
-	  ctxp->lexer->unget_value = c;
-	}
-      else if (c != '\n')
-	{
-	  ctxp->c_line->ahead [0] = c;
-	  ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
-	}
-      return 1;
-    }
-  else 
-    return 0;
-}
-
 /* Parse the end of a C style comment.
  * C is the first character following the '/' and '*'. */
 static void
@@ -615,11 +615,13 @@ java_parse_end_comment (c)
 	{
 	case UEOF:
 	  java_lex_error ("Comment not terminated at end of input", 0);
+	  return;
 	case '*':
 	  switch (c = java_get_unicode ())
 	    {
 	    case UEOF:
 	      java_lex_error ("Comment not terminated at end of input", 0);
+	      return;
 	    case '/':
 	      return;
 	    case '*':	/* reparse only '*' */
@@ -692,6 +694,14 @@ java_letter_or_digit_p (c)
   return _JAVA_LETTER_OR_DIGIT_P (c);
 }
 
+/* This function to be used only by JAVA_ID_CHAR_P ().  */
+static int
+java_ignorable_control_p (c)
+     unicode_t c;
+{
+  return _JAVA_IDENTIFIER_IGNORABLE (c);
+}
+
 static unicode_t
 java_parse_escape_sequence ()
 {
@@ -747,7 +757,7 @@ java_parse_escape_sequence ()
     case '\n':
       return '\n';		/* ULT, caught latter as a specific error */
     default:
-      java_lex_error ("Illegal character in escape sequence", 0);
+      java_lex_error ("Invalid character in escape sequence", 0);
       return JAVA_CHAR_ERROR;
     }
 }
@@ -839,7 +849,14 @@ java_lex (java_lval)
 	    {
 	      c = java_get_unicode ();
 	      if (c == UEOF)
-		java_lex_error ("Comment not terminated at end of input", 0);
+		{
+		  /* It is ok to end a `//' comment with EOF, unless
+		     we're being pedantic.  */
+		  if (pedantic)
+		    java_lex_error ("Comment not terminated at end of input",
+				    0);
+		  return 0;
+		}
 	      if (c == '\n')	/* ULT */
 		goto step1;
 	    }
@@ -1134,6 +1151,7 @@ java_lex (java_lval)
     }
 
   ctxp->minus_seen = 0;
+
   /* Character literals */
   if (c == '\'')
     {
@@ -1141,10 +1159,14 @@ java_lex (java_lval)
       if ((c = java_get_unicode ()) == '\\')
 	char_lit = java_parse_escape_sequence ();
       else
-	char_lit = c;
+	{
+	  if (c == '\n' || c == '\'')
+	    java_lex_error ("Invalid character literal", 0);
+	  char_lit = c;
+	}
 
       c = java_get_unicode ();
-      
+
       if ((c == '\n') || (c == UEOF))
 	java_lex_error ("Character literal not terminated at end of line", 0);
       if (c != '\'')
@@ -1509,7 +1531,7 @@ java_lex (java_lval)
   /* Everything else is an invalid character in the input */
   {
     char lex_error_buffer [128];
-    sprintf (lex_error_buffer, "Invalid character '%s' in input", 
+    sprintf (lex_error_buffer, "Invalid character `%s' in input", 
 	     java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
     java_lex_error (lex_error_buffer, 1);
   }
author	Tom Tromey <tromey@cygnus.com>	2000-11-03 20:27:07 +0000
committer	Tom Tromey <tromey@gcc.gnu.org>	2000-11-03 20:27:07 +0000
commit	747800ee8a9782cea34d002fe6a53439da75b25c (patch)
tree	3ec9a05402bdb242bd150aacc46596d394cd2d1d /gcc/java/lex.c
parent	35e9340fc9994dbd2025f8770593fa0136e33740 (diff)
download	gcc-747800ee8a9782cea34d002fe6a53439da75b25c.zip gcc-747800ee8a9782cea34d002fe6a53439da75b25c.tar.gz gcc-747800ee8a9782cea34d002fe6a53439da75b25c.tar.bz2