Makefile.in (c-lex.o, [...]): Update.

* Makefile.in (c-lex.o, LIBCPP_OBJS, cpplex.o): Update. * c-lex.c (MULTIBYTE_CHARS): Remove conditionals. (lex_string): Take cpp_string with full spelling. (cb_ident): Update. (c_lex): Update diagnostics. * cpplex.c (SPELL_NUMBER, SPELL_STRING): Combine into SPELL_LITERAL. (create_literal): New. (lex_string): Unterminated literals have type CPP_OTHER. (_cpp_lex_direct): Update calls to lex_string. Use create_literal for CPP_OTHER. (cpp_token_len, cpp_spell_token, cpp_output_token): Simplify. (_cpp_equiv_tokens, cpp_interpret_charconst): Update. * cpplib.c (parse_include, do_line, do_linemarker, destringize_and_run): Update for token storing full spelling. * cpplib.h: Update token spelling types. * cppmacro.c (stringify_arg, check_trad_stringification): Update for token storing full spelling. cp: * Make-lang.in (lex.o): Remove mbchar.h. * lex.c (MULTIBYTE_CHARS): Lose. * parser.c (cp_lexer_get_preprocessor_token): CPP_OTHER handled in c-lex.c. testsuite: * gcc.dg/cpp/include2.c: Update. * gcc.dg/cpp/multiline-2.c: New. * gcc.dg/cpp/multiline.c: Update. * gcc.dg/cpp/strify2.c: Update. * gcc.dg/cpp/trad/literals-2.c: Update. From-SVN: r66019
author: Neil Booth <neil@daikokuya.co.uk> 2003-04-23 22:44:06 +0000
committer: Neil Booth <neil@gcc.gnu.org> 2003-04-23 22:44:06 +0000
commit: 6338b35872d465cf27fdbbc43b5a146363c8f246 (patch)
tree: e4f819e101d1dc188ae9d2012e0cb8ab2239160d /gcc/cpplex.c
parent: 06f5e63748eeb66140858914bbffb149406789a9 (diff)
download: gcc-6338b35872d465cf27fdbbc43b5a146363c8f246.zip
gcc-6338b35872d465cf27fdbbc43b5a146363c8f246.tar.gz
gcc-6338b35872d465cf27fdbbc43b5a146363c8f246.tar.bz2
1 files changed, 71 insertions, 114 deletions
diff --git a/gcc/cpplex.c b/gcc/cpplex.c
index d6c617d3..c9c0641 100644
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -26,14 +26,11 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
 #include "cpplib.h"
 #include "cpphash.h"
 
-/* Tokens with SPELL_STRING store their spelling in the token list,
-   and it's length in the token->val.name.len.  */
 enum spell_type
 {
   SPELL_OPERATOR = 0,
   SPELL_IDENT,
-  SPELL_NUMBER,
-  SPELL_STRING,
+  SPELL_LITERAL,
   SPELL_NONE
 };
 
@@ -61,9 +58,11 @@ static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
 static void lex_number PARAMS ((cpp_reader *, cpp_string *));
 static bool forms_identifier_p PARAMS ((cpp_reader *, int));
-static void lex_string PARAMS ((cpp_reader *, cpp_token *));
+static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *));
 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
 				  cppchar_t));
+static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *,
+				    unsigned int, enum cpp_ttype));
 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
 static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
@@ -468,63 +467,77 @@ lex_number (pfile, number)
   number->text = dest;
 }
 
+/* Create a token of type TYPE with a literal spelling.  */
+static void
+create_literal (pfile, token, base, len, type)
+     cpp_reader *pfile;
+     cpp_token *token;
+     const uchar *base;
+     unsigned int len;
+     enum cpp_ttype type;
+{
+  uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
+
+  memcpy (dest, base, len);
+  dest[len] = '\0';
+  token->type = type;
+  token->val.str.len = len;
+  token->val.str.text = dest;
+}
+
 /* Lexes a string, character constant, or angle-bracketed header file
-   name.  The stored string is guaranteed NUL-terminated, but it is
-   not guaranteed that this is the first NUL since embedded NULs are
-   preserved.  */
+   name.  The stored string contains the spelling, including opening
+   quote and leading any leading 'L'.  It returns the type of the
+   literal, or CPP_OTHER if it was not properly terminated.
+
+   The spelling is NUL-terminated, but it is not guaranteed that this
+   is the first NUL since embedded NULs are preserved.  */
 static void
-lex_string (pfile, token)
+lex_string (pfile, token, base)
      cpp_reader *pfile;
      cpp_token *token;
+     const uchar *base;
 {
-  cpp_buffer *buffer = pfile->buffer;
-  bool warned_nulls = false;
-  const uchar *base;
-  uchar *dest;
+  bool saw_NUL = false;
+  const uchar *cur;
   cppchar_t terminator;
-
-  base = buffer->cur;
-  terminator = base[-1];
-  if (terminator == '<')
-    terminator = '>';
+  enum cpp_ttype type;
+
+  cur = base;
+  terminator = *cur++;
+  if (terminator == 'L')
+    terminator = *cur++;
+  if (terminator == '\"')
+    type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
+  else if (terminator == '\'')
+    type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
+  else
+    terminator = '>', type = CPP_HEADER_NAME;
 
   for (;;)
     {
-      cppchar_t c = *buffer->cur++;
+      cppchar_t c = *cur++;
 
       /* In #include-style directives, terminators are not escapable.  */
-      if (c == '\\' && !pfile->state.angled_headers && *buffer->cur != '\n')
-	buffer->cur++;
-      else if (c == terminator || c == '\n')
+      if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
+	cur++;
+      else if (c == terminator)
 	break;
-      else if (c == '\0')
+      else if (c == '\n')
 	{
-	  if (!warned_nulls)
-	    {
-	      warned_nulls = true;
-	      cpp_error (pfile, DL_WARNING,
-			 "null character(s) preserved in literal");
-	    }
+	  cur--;
+	  type = CPP_OTHER;
+	  break;
 	}
+      else if (c == '\0')
+	saw_NUL = true;
     }
 
-  token->val.str.len = buffer->cur - base - 1;
-  dest = _cpp_unaligned_alloc (pfile, token->val.str.len + 1);
-  memcpy (dest, base, token->val.str.len);
-  dest[token->val.str.len] = '\0';
-  token->val.str.text = dest;
+  if (saw_NUL && !pfile->state.skipping)
+    cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
 
-  if (buffer->cur[-1] == '\n')
-    {
-      /* No string literal may extend over multiple lines.  In
-	 assembly language, suppress the error except for <>
-	 includes.  This is a kludge around not knowing where
-	 comments are.  */
-      if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
-	cpp_error (pfile, DL_ERROR, "missing terminating %c character",
-		   (int) terminator);
-      buffer->cur--;
-    }
+  pfile->buffer->cur = cur;
+  create_literal (pfile, token, base, cur - base, type);
 }
 
 /* The stored comment includes the comment start and any terminator.  */
@@ -817,9 +830,7 @@ _cpp_lex_direct (pfile)
       /* 'L' may introduce wide characters or strings.  */
       if (*buffer->cur == '\'' || *buffer->cur == '"')
 	{
-	  result->type = (*buffer->cur == '"' ? CPP_WSTRING: CPP_WCHAR);
-	  buffer->cur++;
-	  lex_string (pfile, result);
+	  lex_string (pfile, result, buffer->cur - 1);
 	  break;
 	}
       /* Fall through.  */
@@ -848,8 +859,7 @@ _cpp_lex_direct (pfile)
 
     case '\'':
     case '"':
-      result->type = c == '"' ? CPP_STRING: CPP_CHAR;
-      lex_string (pfile, result);
+      lex_string (pfile, result, buffer->cur - 1);
       break;
 
     case '/':
@@ -905,8 +915,7 @@ _cpp_lex_direct (pfile)
     case '<':
       if (pfile->state.angled_headers)
 	{
-	  result->type = CPP_HEADER_NAME;
-	  lex_string (pfile, result);
+	  lex_string (pfile, result, buffer->cur - 1);
 	  break;
 	}
 
@@ -1078,15 +1087,8 @@ _cpp_lex_direct (pfile)
       }
 
     default:
-      {
-	uchar *dest = _cpp_unaligned_alloc (pfile, 1 + 1);
-	dest[0] = c;
-	dest[1] = '\0';
-	result->type = CPP_OTHER;
-	result->val.str.len = 1;
-	result->val.str.text = dest;
-	break;
-      }
+      create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
+      break;
     }
 
   return result;
@@ -1103,8 +1105,7 @@ cpp_token_len (token)
   switch (TOKEN_SPELL (token))
     {
     default:		len = 0;				break;
-    case SPELL_NUMBER:
-    case SPELL_STRING:	len = token->val.str.len;		break;
+    case SPELL_LITERAL:	len = token->val.str.len;		break;
     case SPELL_IDENT:	len = NODE_LEN (token->val.node);	break;
     }
   /* 1 for whitespace, 4 for comment delimiters.  */
@@ -1147,34 +1148,11 @@ cpp_spell_token (pfile, token, buffer)
       buffer += NODE_LEN (token->val.node);
       break;
 
-    case SPELL_NUMBER:
+    case SPELL_LITERAL:
       memcpy (buffer, token->val.str.text, token->val.str.len);
       buffer += token->val.str.len;
       break;
 
-    case SPELL_STRING:
-      {
-	int left, right, tag;
-	switch (token->type)
-	  {
-	  case CPP_STRING:	left = '"';  right = '"';  tag = '\0'; break;
-	  case CPP_WSTRING:	left = '"';  right = '"';  tag = 'L';  break;
-	  case CPP_CHAR:	left = '\''; right = '\''; tag = '\0'; break;
-    	  case CPP_WCHAR:	left = '\''; right = '\''; tag = 'L';  break;
-	  case CPP_HEADER_NAME:	left = '<';  right = '>';  tag = '\0'; break;
-	  default:
-	    cpp_error (pfile, DL_ICE, "unknown string token %s\n",
-		       TOKEN_NAME (token));
-	    return buffer;
-	  }
-	if (tag) *buffer++ = tag;
-	*buffer++ = left;
-	memcpy (buffer, token->val.str.text, token->val.str.len);
-	buffer += token->val.str.len;
-	*buffer++ = right;
-      }
-      break;
-
     case SPELL_NONE:
       cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
       break;
@@ -1243,31 +1221,10 @@ cpp_output_token (token, fp)
       fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
     break;
 
-    case SPELL_NUMBER:
+    case SPELL_LITERAL:
       fwrite (token->val.str.text, 1, token->val.str.len, fp);
       break;
 
-    case SPELL_STRING:
-      {
-	int left, right, tag;
-	switch (token->type)
-	  {
-	  case CPP_STRING:	left = '"';  right = '"';  tag = '\0'; break;
-	  case CPP_WSTRING:	left = '"';  right = '"';  tag = 'L';  break;
-	  case CPP_CHAR:	left = '\''; right = '\''; tag = '\0'; break;
-    	  case CPP_WCHAR:	left = '\''; right = '\''; tag = 'L';  break;
-	  case CPP_HEADER_NAME:	left = '<';  right = '>';  tag = '\0'; break;
-	  default:
-	    fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
-	    return;
-	  }
-	if (tag) putc (tag, fp);
-	putc (left, fp);
-	fwrite (token->val.str.text, 1, token->val.str.len, fp);
-	putc (right, fp);
-      }
-      break;
-
     case SPELL_NONE:
       /* An error, most probably.  */
       break;
@@ -1289,8 +1246,7 @@ _cpp_equiv_tokens (a, b)
 	return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
       case SPELL_IDENT:
 	return a->val.node == b->val.node;
-      case SPELL_NUMBER:
-      case SPELL_STRING:
+      case SPELL_LITERAL:
 	return (a->val.str.len == b->val.str.len
 		&& !memcmp (a->val.str.text, b->val.str.text,
 			    a->val.str.len));
@@ -1588,14 +1544,15 @@ cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
      unsigned int *pchars_seen;
      int *unsignedp;
 {
-  const unsigned char *str = token->val.str.text;
-  const unsigned char *limit = str + token->val.str.len;
+  const unsigned char *str, *limit;
   unsigned int chars_seen = 0;
   size_t width, max_chars;
   cppchar_t c, mask, result = 0;
   bool unsigned_p;
 
-  /* Width in bits.  */
+  str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
+  limit = token->val.str.text + token->val.str.len - 1;
+
   if (token->type == CPP_CHAR)
     {
       width = CPP_OPTION (pfile, char_precision);
author	Neil Booth <neil@daikokuya.co.uk>	2003-04-23 22:44:06 +0000
committer	Neil Booth <neil@gcc.gnu.org>	2003-04-23 22:44:06 +0000
commit	6338b35872d465cf27fdbbc43b5a146363c8f246 (patch)
tree	e4f819e101d1dc188ae9d2012e0cb8ab2239160d /gcc/cpplex.c
parent	06f5e63748eeb66140858914bbffb149406789a9 (diff)
download	gcc-6338b35872d465cf27fdbbc43b5a146363c8f246.zip gcc-6338b35872d465cf27fdbbc43b5a146363c8f246.tar.gz gcc-6338b35872d465cf27fdbbc43b5a146363c8f246.tar.bz2