cpplib.h (CPP_AT_NAME, [...]): New token types.

* cpplib.h (CPP_AT_NAME, CPP_OBJC_STRING): New token types. (struct cpp_options): Add narrow_charset, wide_charset, bytes_big_endian fields. Remove EBCDIC field. (cpp_init_iconv, cpp_interpret_string): New external interfaces. * cpphash.h: Include <iconv.h> if we have it, otherwise provide a dummy definition of iconv_t. (struct cpp_reader): Add narrow_cset_desc and wide_cset_desc fields. (_cpp_valid_ucn): Update prototype. (_cpp_destroy_iconv): New prototype. * doc/cpp.texi: Document character set handling. * doc/cppopts.texi: Document -fexec-charset= and -fexec-wide-charset=. * doc/extend.texi: Delete entire section on multiline strings. Rewrite section on __FUNCTION__ etc now that these are variables in C. * cppucnid.tab, cppucnid.pl: New files. * cppucnid.h: New generated file. * cppcharset.c: Include cppucnid.h. Lots of commentary added. (iconv_open, iconv, iconv_close): Provide dummy definitions if !HAVE_ICONV. (SOURCE_CHARSET, struct strbuf, init_iconv_desc, cpp_init_iconv, _cpp_destroy_iconv, convert_cset, width_to_mask, convert_ucn, emit_numeric_escape, convert_hex, convert_oct, convert_escape, cpp_interpret_string, narrow_str_to_charconst, wide_str_to_charconst): New. (ucn_valid_in_identifier): Use a binary search through the ucnranges table defined in cppucnid.h, not a long chain of if statements. (_cpp_valid_ucn): Add a limit pointer. Downgrade "universal character names are only valid in C++ and C99" to a warning. Issue the "meaning of \[uU] is different in traditional C" warning here. Take care not to let iconv see an invalid UCS value if we get a malformed UCN. Issue an error if we don't have iconv. (cpp_interpret_charconst): Moved here from cpplex.c. Use cpp_interpret_string to do the heavy lifting. * cppinit.c (cpp_create_reader): Initialize bytes_big_endian, narrow_charset, wide_charset fields of options structure. (cpp_destroy): Call _cpp_destroy_iconv. * cpplex.c (forms_identifier_p): Adjust call to _cpp_valid_ucn. (maybe_read_ucn, hex_digit_value, cpp_parse_escape): Delete. (cpp_interpret_charconst): Moved to cppcharset.c. * cpplib.c (dequote_string): Delete. (interpret_string_notranslate): New. (do_line, do_linemarker): Use interpret_string_notranslate. * Makefile.in (cppcharset.o): Depend on cppucnid.h. * c-common.c (fname_string, combine_strings): Delete. * c-common.h (fname_string, combine_strings): Delete prototypes. * c-lex.c (ignore_escape_flag): Delete. (cb_ident): Use cpp_interpret_string, not lex_string. (get_nonpadding_token): New function. (c_lex): Handle Objective-C @-prefixed identifiers and strings here. Adjust calls to lex_string. Don't write *value twice. (lex_string): Now handles string constant concatenation. Most of the work handed off to cpp_interpret_string. Call fix_string_type here. * c-parse.in (STRING_FUNC_NAME, VAR_FUNC_NAME): Replace with FUNC_NAME, throughout. (OBJC_STRING): New token type. (primary:STRING): No need to call fix_string_type here. (primary:objc_string): Make that OBJC_STRING. (objc_string nonterminal): Delete. (yylexname): Delete code to handle fake string constants. (yylexstring): Delete entirely. (_yylex): Handle CPP_AT_NAME and CPP_OBJC_STRING. No need to handle CPP_ATSIGN. * c.opt (-fexec-charset=, -fwide-exec-charset=): New options. * c-opts.c (missing_arg, c_common_handle_option): Handle OPT_fexec_charset_ and OPT_fwide_exec_charset_. (c_common_init): Set cpp_opts->bytes_big_endian, not cpp_opts->EBCDIC. Call cpp_init_iconv. (print_help): Document -fexec-charset= and -fexec-wide-charset=. (TARGET_EBCDIC): Delete default definition. * objc/objc-act.c (build_objc_string_object): No need to handle string constant concatenation. cp: * parser.c (cp_lexer_read_token): No need to handle string constant concatenation. testsuite: * gcc.c-torture/execute/wchar_t-1.x: New file; XFAIL wchar_t-1.c everywhere. * gcc.dg/concat.c: Concatenation of string constants with __FUNCTION__ / __PRETTY_FUNCTION__ is now a hard error. * gcc.dg/wtr-strcat-1.c: Loosen dg-warning regexp. * gcc.dg/cpp/escape-2.c: Use wide character constants where necessary to avoid multi-character character constant warning. * gcc.dg/cpp/escape.c: Likewise. * gcc.dg/cpp/ucs.c: Likewise. Remove backslashes from dg-bogus comments, as they confuse Tcl. Fix a typo. libstdc++-v3: * testsuite/22_locale/collate/compare/wchar_t/2.cc * testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc * testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc * testsuite/22_locale/collate/hash/wchar_t/2.cc * testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc * testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc * testsuite/22_locale/collate/transform/wchar_t/2.cc * testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc * testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc: XFAIL on all targets. From-SVN: r68952
author: Zack Weinberg <zack@gcc.gnu.org> 2003-07-05 00:24:00 +0000
committer: Zack Weinberg <zack@gcc.gnu.org> 2003-07-05 00:24:00 +0000
commit: e6cc3a24c237713413070f4a5dc35b55dc2715b8 (patch)
tree: 34c7734f7acee49beff2b3d99cbdf53576456697 /gcc/cpplex.c
parent: 61aeb06fe596bd822b665d65a271804efdaf0053 (diff)
download: gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.zip
gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.tar.gz
gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.tar.bz2
1 files changed, 1 insertions, 288 deletions
diff --git a/gcc/cpplex.c b/gcc/cpplex.c
index c536c76..edb765d 100644
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -64,10 +64,8 @@ static void create_literal (cpp_reader *, cpp_token *, const uchar *,
 			    unsigned int, enum cpp_ttype);
 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
 static int name_p (cpp_reader *, const cpp_string *);
-static cppchar_t maybe_read_ucn (cpp_reader *, const uchar **);
 static tokenrun *next_tokenrun (tokenrun *);
 
-static unsigned int hex_digit_value (unsigned int);
 static _cpp_buff *new_buff (size_t);
 
 
@@ -397,7 +395,7 @@ forms_identifier_p (cpp_reader *pfile, int first)
       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
     {
       buffer->cur += 2;
-      if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
+      if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
 	return true;
       buffer->cur -= 2;
     }
@@ -1316,291 +1314,6 @@ cpp_output_line (cpp_reader *pfile, FILE *fp)
   putc ('\n', fp);
 }
 
-/* Returns the value of a hexadecimal digit.  */
-static unsigned int
-hex_digit_value (unsigned int c)
-{
-  if (hex_p (c))
-    return hex_value (c);
-  else
-    abort ();
-}
-
-/* Read a possible universal character name starting at *PSTR.  */
-static cppchar_t
-maybe_read_ucn (cpp_reader *pfile, const uchar **pstr)
-{
-  cppchar_t result, c = (*pstr)[-1];
-
-  result = _cpp_valid_ucn (pfile, pstr, false);
-  if (result)
-    {
-      if (CPP_WTRADITIONAL (pfile))
-	cpp_error (pfile, DL_WARNING,
-		   "the meaning of '\\%c' is different in traditional C",
-		   (int) c);
-
-      if (CPP_OPTION (pfile, EBCDIC))
-	{
-	  cpp_error (pfile, DL_ERROR,
-		     "universal character with an EBCDIC target");
-	  result = 0x3f;  /* EBCDIC invalid character */
-	}
-    }
-
-  return result;
-}
-
-/* Returns the value of an escape sequence, truncated to the correct
-   target precision.  PSTR points to the input pointer, which is just
-   after the backslash.  LIMIT is how much text we have.  WIDE is true
-   if the escape sequence is part of a wide character constant or
-   string literal.  Handles all relevant diagnostics.  */
-cppchar_t
-cpp_parse_escape (cpp_reader *pfile, const unsigned char **pstr,
-		  const unsigned char *limit, int wide)
-{
-  /* Values of \a \b \e \f \n \r \t \v respectively.  */
-  static const uchar ascii[]  = {  7,  8, 27, 12, 10, 13,  9, 11 };
-  static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13,  5, 11 };
-
-  int unknown = 0;
-  const unsigned char *str = *pstr, *charconsts;
-  cppchar_t c, ucn, mask;
-  unsigned int width;
-
-  if (CPP_OPTION (pfile, EBCDIC))
-    charconsts = ebcdic;
-  else
-    charconsts = ascii;
-
-  if (wide)
-    width = CPP_OPTION (pfile, wchar_precision);
-  else
-    width = CPP_OPTION (pfile, char_precision);
-  if (width < BITS_PER_CPPCHAR_T)
-    mask = ((cppchar_t) 1 << width) - 1;
-  else
-    mask = ~0;
-
-  c = *str++;
-  switch (c)
-    {
-    case '\\': case '\'': case '"': case '?': break;
-    case 'b': c = charconsts[1];  break;
-    case 'f': c = charconsts[3];  break;
-    case 'n': c = charconsts[4];  break;
-    case 'r': c = charconsts[5];  break;
-    case 't': c = charconsts[6];  break;
-    case 'v': c = charconsts[7];  break;
-
-    case '(': case '{': case '[': case '%':
-      /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
-	 '\%' is used to prevent SCCS from getting confused.  */
-      unknown = CPP_PEDANTIC (pfile);
-      break;
-
-    case 'a':
-      if (CPP_WTRADITIONAL (pfile))
-	cpp_error (pfile, DL_WARNING,
-		   "the meaning of '\\a' is different in traditional C");
-      c = charconsts[0];
-      break;
-
-    case 'e': case 'E':
-      if (CPP_PEDANTIC (pfile))
-	cpp_error (pfile, DL_PEDWARN,
-		   "non-ISO-standard escape sequence, '\\%c'", (int) c);
-      c = charconsts[2];
-      break;
-
-    case 'u': case 'U':
-      ucn = maybe_read_ucn (pfile, &str);
-      if (ucn)
-	c = ucn;
-      else
-	unknown = true;
-      break;
-
-    case 'x':
-      if (CPP_WTRADITIONAL (pfile))
-	cpp_error (pfile, DL_WARNING,
-		   "the meaning of '\\x' is different in traditional C");
-
-      {
-	cppchar_t i = 0, overflow = 0;
-	int digits_found = 0;
-
-	while (str < limit)
-	  {
-	    c = *str;
-	    if (! ISXDIGIT (c))
-	      break;
-	    str++;
-	    overflow |= i ^ (i << 4 >> 4);
-	    i = (i << 4) + hex_digit_value (c);
-	    digits_found = 1;
-	  }
-
-	if (!digits_found)
-	  cpp_error (pfile, DL_ERROR,
-		       "\\x used with no following hex digits");
-
-	if (overflow | (i != (i & mask)))
-	  {
-	    cpp_error (pfile, DL_PEDWARN,
-		       "hex escape sequence out of range");
-	    i &= mask;
-	  }
-	c = i;
-      }
-      break;
-
-    case '0':  case '1':  case '2':  case '3':
-    case '4':  case '5':  case '6':  case '7':
-      {
-	size_t count = 0;
-	cppchar_t i = c - '0';
-
-	while (str < limit && ++count < 3)
-	  {
-	    c = *str;
-	    if (c < '0' || c > '7')
-	      break;
-	    str++;
-	    i = (i << 3) + c - '0';
-	  }
-
-	if (i != (i & mask))
-	  {
-	    cpp_error (pfile, DL_PEDWARN,
-		       "octal escape sequence out of range");
-	    i &= mask;
-	  }
-	c = i;
-      }
-      break;
-
-    default:
-      unknown = 1;
-      break;
-    }
-
-  if (unknown)
-    {
-      if (ISGRAPH (c))
-	cpp_error (pfile, DL_PEDWARN,
-		   "unknown escape sequence '\\%c'", (int) c);
-      else
-	cpp_error (pfile, DL_PEDWARN,
-		   "unknown escape sequence: '\\%03o'", (int) c);
-    }
-
-  if (c > mask)
-    {
-      cpp_error (pfile, DL_PEDWARN,
-		 "escape sequence out of range for its type");
-      c &= mask;
-    }
-
-  *pstr = str;
-  return c;
-}
-
-/* Interpret a (possibly wide) character constant in TOKEN.
-   WARN_MULTI warns about multi-character charconsts.  PCHARS_SEEN
-   points to a variable that is filled in with the number of
-   characters seen, and UNSIGNEDP to a variable that indicates whether
-   the result has signed type.  */
-cppchar_t
-cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
-			 unsigned int *pchars_seen, int *unsignedp)
-{
-  const unsigned char *str, *limit;
-  unsigned int chars_seen = 0;
-  size_t width, max_chars;
-  cppchar_t c, mask, result = 0;
-  bool unsigned_p;
-
-  str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
-  limit = token->val.str.text + token->val.str.len - 1;
-
-  if (token->type == CPP_CHAR)
-    {
-      width = CPP_OPTION (pfile, char_precision);
-      max_chars = CPP_OPTION (pfile, int_precision) / width;
-      unsigned_p = CPP_OPTION (pfile, unsigned_char);
-    }
-  else
-    {
-      width = CPP_OPTION (pfile, wchar_precision);
-      max_chars = 1;
-      unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
-    }
-
-  if (width < BITS_PER_CPPCHAR_T)
-    mask = ((cppchar_t) 1 << width) - 1;
-  else
-    mask = ~0;
-
-  while (str < limit)
-    {
-      c = *str++;
-
-      if (c == '\\')
-	c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
-
-#ifdef MAP_CHARACTER
-      if (ISPRINT (c))
-	c = MAP_CHARACTER (c);
-#endif
-
-      chars_seen++;
-
-      /* Truncate the character, scale the result and merge the two.  */
-      c &= mask;
-      if (width < BITS_PER_CPPCHAR_T)
-	result = (result << width) | c;
-      else
-	result = c;
-    }
-
-  if (chars_seen == 0)
-    cpp_error (pfile, DL_ERROR, "empty character constant");
-  else if (chars_seen > 1)
-    {
-      /* Multichar charconsts are of type int and therefore signed.  */
-      unsigned_p = 0;
-
-      if (chars_seen > max_chars)
-	{
-	  chars_seen = max_chars;
-	  cpp_error (pfile, DL_WARNING,
-		     "character constant too long for its type");
-	}
-      else if (CPP_OPTION (pfile, warn_multichar))
-	cpp_error (pfile, DL_WARNING, "multi-character character constant");
-    }
-
-  /* Sign-extend or truncate the constant to cppchar_t.  The value is
-     in WIDTH bits, but for multi-char charconsts it's value is the
-     full target type's width.  */
-  if (chars_seen > 1)
-    width *= max_chars;
-  if (width < BITS_PER_CPPCHAR_T)
-    {
-      mask = ((cppchar_t) 1 << width) - 1;
-      if (unsigned_p || !(result & (1 << (width - 1))))
-	result &= mask;
-      else
-	result |= ~mask;
-    }
-
-  *pchars_seen = chars_seen;
-  *unsignedp = unsigned_p;
-  return result;
-}
-
 /* Memory buffers.  Changing these three constants can have a dramatic
    effect on performance.  The values here are reasonable defaults,
    but might be tuned.  If you adjust them, be sure to test across a
author	Zack Weinberg <zack@gcc.gnu.org>	2003-07-05 00:24:00 +0000
committer	Zack Weinberg <zack@gcc.gnu.org>	2003-07-05 00:24:00 +0000
commit	e6cc3a24c237713413070f4a5dc35b55dc2715b8 (patch)
tree	34c7734f7acee49beff2b3d99cbdf53576456697 /gcc/cpplex.c
parent	61aeb06fe596bd822b665d65a271804efdaf0053 (diff)
download	gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.zip gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.tar.gz gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.tar.bz2