aboutsummaryrefslogtreecommitdiff
path: root/gcc/cpplex.c
diff options
context:
space:
mode:
authorZack Weinberg <zack@gcc.gnu.org>2003-07-05 00:24:00 +0000
committerZack Weinberg <zack@gcc.gnu.org>2003-07-05 00:24:00 +0000
commite6cc3a24c237713413070f4a5dc35b55dc2715b8 (patch)
tree34c7734f7acee49beff2b3d99cbdf53576456697 /gcc/cpplex.c
parent61aeb06fe596bd822b665d65a271804efdaf0053 (diff)
downloadgcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.zip
gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.tar.gz
gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.tar.bz2
cpplib.h (CPP_AT_NAME, [...]): New token types.
* cpplib.h (CPP_AT_NAME, CPP_OBJC_STRING): New token types. (struct cpp_options): Add narrow_charset, wide_charset, bytes_big_endian fields. Remove EBCDIC field. (cpp_init_iconv, cpp_interpret_string): New external interfaces. * cpphash.h: Include <iconv.h> if we have it, otherwise provide a dummy definition of iconv_t. (struct cpp_reader): Add narrow_cset_desc and wide_cset_desc fields. (_cpp_valid_ucn): Update prototype. (_cpp_destroy_iconv): New prototype. * doc/cpp.texi: Document character set handling. * doc/cppopts.texi: Document -fexec-charset= and -fexec-wide-charset=. * doc/extend.texi: Delete entire section on multiline strings. Rewrite section on __FUNCTION__ etc now that these are variables in C. * cppucnid.tab, cppucnid.pl: New files. * cppucnid.h: New generated file. * cppcharset.c: Include cppucnid.h. Lots of commentary added. (iconv_open, iconv, iconv_close): Provide dummy definitions if !HAVE_ICONV. (SOURCE_CHARSET, struct strbuf, init_iconv_desc, cpp_init_iconv, _cpp_destroy_iconv, convert_cset, width_to_mask, convert_ucn, emit_numeric_escape, convert_hex, convert_oct, convert_escape, cpp_interpret_string, narrow_str_to_charconst, wide_str_to_charconst): New. (ucn_valid_in_identifier): Use a binary search through the ucnranges table defined in cppucnid.h, not a long chain of if statements. (_cpp_valid_ucn): Add a limit pointer. Downgrade "universal character names are only valid in C++ and C99" to a warning. Issue the "meaning of \[uU] is different in traditional C" warning here. Take care not to let iconv see an invalid UCS value if we get a malformed UCN. Issue an error if we don't have iconv. (cpp_interpret_charconst): Moved here from cpplex.c. Use cpp_interpret_string to do the heavy lifting. * cppinit.c (cpp_create_reader): Initialize bytes_big_endian, narrow_charset, wide_charset fields of options structure. (cpp_destroy): Call _cpp_destroy_iconv. * cpplex.c (forms_identifier_p): Adjust call to _cpp_valid_ucn. (maybe_read_ucn, hex_digit_value, cpp_parse_escape): Delete. (cpp_interpret_charconst): Moved to cppcharset.c. * cpplib.c (dequote_string): Delete. (interpret_string_notranslate): New. (do_line, do_linemarker): Use interpret_string_notranslate. * Makefile.in (cppcharset.o): Depend on cppucnid.h. * c-common.c (fname_string, combine_strings): Delete. * c-common.h (fname_string, combine_strings): Delete prototypes. * c-lex.c (ignore_escape_flag): Delete. (cb_ident): Use cpp_interpret_string, not lex_string. (get_nonpadding_token): New function. (c_lex): Handle Objective-C @-prefixed identifiers and strings here. Adjust calls to lex_string. Don't write *value twice. (lex_string): Now handles string constant concatenation. Most of the work handed off to cpp_interpret_string. Call fix_string_type here. * c-parse.in (STRING_FUNC_NAME, VAR_FUNC_NAME): Replace with FUNC_NAME, throughout. (OBJC_STRING): New token type. (primary:STRING): No need to call fix_string_type here. (primary:objc_string): Make that OBJC_STRING. (objc_string nonterminal): Delete. (yylexname): Delete code to handle fake string constants. (yylexstring): Delete entirely. (_yylex): Handle CPP_AT_NAME and CPP_OBJC_STRING. No need to handle CPP_ATSIGN. * c.opt (-fexec-charset=, -fwide-exec-charset=): New options. * c-opts.c (missing_arg, c_common_handle_option): Handle OPT_fexec_charset_ and OPT_fwide_exec_charset_. (c_common_init): Set cpp_opts->bytes_big_endian, not cpp_opts->EBCDIC. Call cpp_init_iconv. (print_help): Document -fexec-charset= and -fexec-wide-charset=. (TARGET_EBCDIC): Delete default definition. * objc/objc-act.c (build_objc_string_object): No need to handle string constant concatenation. cp: * parser.c (cp_lexer_read_token): No need to handle string constant concatenation. testsuite: * gcc.c-torture/execute/wchar_t-1.x: New file; XFAIL wchar_t-1.c everywhere. * gcc.dg/concat.c: Concatenation of string constants with __FUNCTION__ / __PRETTY_FUNCTION__ is now a hard error. * gcc.dg/wtr-strcat-1.c: Loosen dg-warning regexp. * gcc.dg/cpp/escape-2.c: Use wide character constants where necessary to avoid multi-character character constant warning. * gcc.dg/cpp/escape.c: Likewise. * gcc.dg/cpp/ucs.c: Likewise. Remove backslashes from dg-bogus comments, as they confuse Tcl. Fix a typo. libstdc++-v3: * testsuite/22_locale/collate/compare/wchar_t/2.cc * testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc * testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc * testsuite/22_locale/collate/hash/wchar_t/2.cc * testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc * testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc * testsuite/22_locale/collate/transform/wchar_t/2.cc * testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc * testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc: XFAIL on all targets. From-SVN: r68952
Diffstat (limited to 'gcc/cpplex.c')
-rw-r--r--gcc/cpplex.c289
1 files changed, 1 insertions, 288 deletions
diff --git a/gcc/cpplex.c b/gcc/cpplex.c
index c536c76..edb765d 100644
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -64,10 +64,8 @@ static void create_literal (cpp_reader *, cpp_token *, const uchar *,
unsigned int, enum cpp_ttype);
static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
static int name_p (cpp_reader *, const cpp_string *);
-static cppchar_t maybe_read_ucn (cpp_reader *, const uchar **);
static tokenrun *next_tokenrun (tokenrun *);
-static unsigned int hex_digit_value (unsigned int);
static _cpp_buff *new_buff (size_t);
@@ -397,7 +395,7 @@ forms_identifier_p (cpp_reader *pfile, int first)
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
buffer->cur += 2;
- if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
+ if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
return true;
buffer->cur -= 2;
}
@@ -1316,291 +1314,6 @@ cpp_output_line (cpp_reader *pfile, FILE *fp)
putc ('\n', fp);
}
-/* Returns the value of a hexadecimal digit. */
-static unsigned int
-hex_digit_value (unsigned int c)
-{
- if (hex_p (c))
- return hex_value (c);
- else
- abort ();
-}
-
-/* Read a possible universal character name starting at *PSTR. */
-static cppchar_t
-maybe_read_ucn (cpp_reader *pfile, const uchar **pstr)
-{
- cppchar_t result, c = (*pstr)[-1];
-
- result = _cpp_valid_ucn (pfile, pstr, false);
- if (result)
- {
- if (CPP_WTRADITIONAL (pfile))
- cpp_error (pfile, DL_WARNING,
- "the meaning of '\\%c' is different in traditional C",
- (int) c);
-
- if (CPP_OPTION (pfile, EBCDIC))
- {
- cpp_error (pfile, DL_ERROR,
- "universal character with an EBCDIC target");
- result = 0x3f; /* EBCDIC invalid character */
- }
- }
-
- return result;
-}
-
-/* Returns the value of an escape sequence, truncated to the correct
- target precision. PSTR points to the input pointer, which is just
- after the backslash. LIMIT is how much text we have. WIDE is true
- if the escape sequence is part of a wide character constant or
- string literal. Handles all relevant diagnostics. */
-cppchar_t
-cpp_parse_escape (cpp_reader *pfile, const unsigned char **pstr,
- const unsigned char *limit, int wide)
-{
- /* Values of \a \b \e \f \n \r \t \v respectively. */
- static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
- static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
-
- int unknown = 0;
- const unsigned char *str = *pstr, *charconsts;
- cppchar_t c, ucn, mask;
- unsigned int width;
-
- if (CPP_OPTION (pfile, EBCDIC))
- charconsts = ebcdic;
- else
- charconsts = ascii;
-
- if (wide)
- width = CPP_OPTION (pfile, wchar_precision);
- else
- width = CPP_OPTION (pfile, char_precision);
- if (width < BITS_PER_CPPCHAR_T)
- mask = ((cppchar_t) 1 << width) - 1;
- else
- mask = ~0;
-
- c = *str++;
- switch (c)
- {
- case '\\': case '\'': case '"': case '?': break;
- case 'b': c = charconsts[1]; break;
- case 'f': c = charconsts[3]; break;
- case 'n': c = charconsts[4]; break;
- case 'r': c = charconsts[5]; break;
- case 't': c = charconsts[6]; break;
- case 'v': c = charconsts[7]; break;
-
- case '(': case '{': case '[': case '%':
- /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
- '\%' is used to prevent SCCS from getting confused. */
- unknown = CPP_PEDANTIC (pfile);
- break;
-
- case 'a':
- if (CPP_WTRADITIONAL (pfile))
- cpp_error (pfile, DL_WARNING,
- "the meaning of '\\a' is different in traditional C");
- c = charconsts[0];
- break;
-
- case 'e': case 'E':
- if (CPP_PEDANTIC (pfile))
- cpp_error (pfile, DL_PEDWARN,
- "non-ISO-standard escape sequence, '\\%c'", (int) c);
- c = charconsts[2];
- break;
-
- case 'u': case 'U':
- ucn = maybe_read_ucn (pfile, &str);
- if (ucn)
- c = ucn;
- else
- unknown = true;
- break;
-
- case 'x':
- if (CPP_WTRADITIONAL (pfile))
- cpp_error (pfile, DL_WARNING,
- "the meaning of '\\x' is different in traditional C");
-
- {
- cppchar_t i = 0, overflow = 0;
- int digits_found = 0;
-
- while (str < limit)
- {
- c = *str;
- if (! ISXDIGIT (c))
- break;
- str++;
- overflow |= i ^ (i << 4 >> 4);
- i = (i << 4) + hex_digit_value (c);
- digits_found = 1;
- }
-
- if (!digits_found)
- cpp_error (pfile, DL_ERROR,
- "\\x used with no following hex digits");
-
- if (overflow | (i != (i & mask)))
- {
- cpp_error (pfile, DL_PEDWARN,
- "hex escape sequence out of range");
- i &= mask;
- }
- c = i;
- }
- break;
-
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- {
- size_t count = 0;
- cppchar_t i = c - '0';
-
- while (str < limit && ++count < 3)
- {
- c = *str;
- if (c < '0' || c > '7')
- break;
- str++;
- i = (i << 3) + c - '0';
- }
-
- if (i != (i & mask))
- {
- cpp_error (pfile, DL_PEDWARN,
- "octal escape sequence out of range");
- i &= mask;
- }
- c = i;
- }
- break;
-
- default:
- unknown = 1;
- break;
- }
-
- if (unknown)
- {
- if (ISGRAPH (c))
- cpp_error (pfile, DL_PEDWARN,
- "unknown escape sequence '\\%c'", (int) c);
- else
- cpp_error (pfile, DL_PEDWARN,
- "unknown escape sequence: '\\%03o'", (int) c);
- }
-
- if (c > mask)
- {
- cpp_error (pfile, DL_PEDWARN,
- "escape sequence out of range for its type");
- c &= mask;
- }
-
- *pstr = str;
- return c;
-}
-
-/* Interpret a (possibly wide) character constant in TOKEN.
- WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
- points to a variable that is filled in with the number of
- characters seen, and UNSIGNEDP to a variable that indicates whether
- the result has signed type. */
-cppchar_t
-cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
- unsigned int *pchars_seen, int *unsignedp)
-{
- const unsigned char *str, *limit;
- unsigned int chars_seen = 0;
- size_t width, max_chars;
- cppchar_t c, mask, result = 0;
- bool unsigned_p;
-
- str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
- limit = token->val.str.text + token->val.str.len - 1;
-
- if (token->type == CPP_CHAR)
- {
- width = CPP_OPTION (pfile, char_precision);
- max_chars = CPP_OPTION (pfile, int_precision) / width;
- unsigned_p = CPP_OPTION (pfile, unsigned_char);
- }
- else
- {
- width = CPP_OPTION (pfile, wchar_precision);
- max_chars = 1;
- unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
- }
-
- if (width < BITS_PER_CPPCHAR_T)
- mask = ((cppchar_t) 1 << width) - 1;
- else
- mask = ~0;
-
- while (str < limit)
- {
- c = *str++;
-
- if (c == '\\')
- c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
-
-#ifdef MAP_CHARACTER
- if (ISPRINT (c))
- c = MAP_CHARACTER (c);
-#endif
-
- chars_seen++;
-
- /* Truncate the character, scale the result and merge the two. */
- c &= mask;
- if (width < BITS_PER_CPPCHAR_T)
- result = (result << width) | c;
- else
- result = c;
- }
-
- if (chars_seen == 0)
- cpp_error (pfile, DL_ERROR, "empty character constant");
- else if (chars_seen > 1)
- {
- /* Multichar charconsts are of type int and therefore signed. */
- unsigned_p = 0;
-
- if (chars_seen > max_chars)
- {
- chars_seen = max_chars;
- cpp_error (pfile, DL_WARNING,
- "character constant too long for its type");
- }
- else if (CPP_OPTION (pfile, warn_multichar))
- cpp_error (pfile, DL_WARNING, "multi-character character constant");
- }
-
- /* Sign-extend or truncate the constant to cppchar_t. The value is
- in WIDTH bits, but for multi-char charconsts it's value is the
- full target type's width. */
- if (chars_seen > 1)
- width *= max_chars;
- if (width < BITS_PER_CPPCHAR_T)
- {
- mask = ((cppchar_t) 1 << width) - 1;
- if (unsigned_p || !(result & (1 << (width - 1))))
- result &= mask;
- else
- result |= ~mask;
- }
-
- *pchars_seen = chars_seen;
- *unsignedp = unsigned_p;
- return result;
-}
-
/* Memory buffers. Changing these three constants can have a dramatic
effect on performance. The values here are reasonable defaults,
but might be tuned. If you adjust them, be sure to test across a