diff options
author | Zack Weinberg <zack@gcc.gnu.org> | 2003-07-05 00:24:00 +0000 |
---|---|---|
committer | Zack Weinberg <zack@gcc.gnu.org> | 2003-07-05 00:24:00 +0000 |
commit | e6cc3a24c237713413070f4a5dc35b55dc2715b8 (patch) | |
tree | 34c7734f7acee49beff2b3d99cbdf53576456697 /gcc/cpplex.c | |
parent | 61aeb06fe596bd822b665d65a271804efdaf0053 (diff) | |
download | gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.zip gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.tar.gz gcc-e6cc3a24c237713413070f4a5dc35b55dc2715b8.tar.bz2 |
cpplib.h (CPP_AT_NAME, [...]): New token types.
* cpplib.h (CPP_AT_NAME, CPP_OBJC_STRING): New token types.
(struct cpp_options): Add narrow_charset, wide_charset,
bytes_big_endian fields. Remove EBCDIC field.
(cpp_init_iconv, cpp_interpret_string): New external interfaces.
* cpphash.h: Include <iconv.h> if we have it, otherwise
provide a dummy definition of iconv_t.
(struct cpp_reader): Add narrow_cset_desc and wide_cset_desc fields.
(_cpp_valid_ucn): Update prototype.
(_cpp_destroy_iconv): New prototype.
* doc/cpp.texi: Document character set handling.
* doc/cppopts.texi: Document -fexec-charset= and -fexec-wide-charset=.
* doc/extend.texi: Delete entire section on multiline strings.
Rewrite section on __FUNCTION__ etc now that these are
variables in C.
* cppucnid.tab, cppucnid.pl: New files.
* cppucnid.h: New generated file.
* cppcharset.c: Include cppucnid.h. Lots of commentary added.
(iconv_open, iconv, iconv_close): Provide dummy definitions
if !HAVE_ICONV.
(SOURCE_CHARSET, struct strbuf, init_iconv_desc, cpp_init_iconv,
_cpp_destroy_iconv, convert_cset, width_to_mask, convert_ucn,
emit_numeric_escape, convert_hex, convert_oct, convert_escape,
cpp_interpret_string, narrow_str_to_charconst,
wide_str_to_charconst): New.
(ucn_valid_in_identifier): Use a binary search through the
ucnranges table defined in cppucnid.h, not a long chain of if
statements.
(_cpp_valid_ucn): Add a limit pointer. Downgrade "universal
character names are only valid in C++ and C99" to a warning.
Issue the "meaning of \[uU] is different in traditional C"
warning here. Take care not to let iconv see an invalid UCS
value if we get a malformed UCN. Issue an error if we don't
have iconv.
(cpp_interpret_charconst): Moved here from cpplex.c. Use
cpp_interpret_string to do the heavy lifting.
* cppinit.c (cpp_create_reader): Initialize bytes_big_endian,
narrow_charset, wide_charset fields of options structure.
(cpp_destroy): Call _cpp_destroy_iconv.
* cpplex.c (forms_identifier_p): Adjust call to _cpp_valid_ucn.
(maybe_read_ucn, hex_digit_value, cpp_parse_escape): Delete.
(cpp_interpret_charconst): Moved to cppcharset.c.
* cpplib.c (dequote_string): Delete.
(interpret_string_notranslate): New.
(do_line, do_linemarker): Use interpret_string_notranslate.
* Makefile.in (cppcharset.o): Depend on cppucnid.h.
* c-common.c (fname_string, combine_strings): Delete.
* c-common.h (fname_string, combine_strings): Delete prototypes.
* c-lex.c (ignore_escape_flag): Delete.
(cb_ident): Use cpp_interpret_string, not lex_string.
(get_nonpadding_token): New function.
(c_lex): Handle Objective-C @-prefixed identifiers and strings here.
Adjust calls to lex_string. Don't write *value twice.
(lex_string): Now handles string constant concatenation.
Most of the work handed off to cpp_interpret_string.
Call fix_string_type here.
* c-parse.in (STRING_FUNC_NAME, VAR_FUNC_NAME): Replace with
FUNC_NAME, throughout.
(OBJC_STRING): New token type.
(primary:STRING): No need to call fix_string_type here.
(primary:objc_string): Make that OBJC_STRING.
(objc_string nonterminal): Delete.
(yylexname): Delete code to handle fake string constants.
(yylexstring): Delete entirely.
(_yylex): Handle CPP_AT_NAME and CPP_OBJC_STRING. No need
to handle CPP_ATSIGN.
* c.opt (-fexec-charset=, -fwide-exec-charset=): New options.
* c-opts.c (missing_arg, c_common_handle_option): Handle
OPT_fexec_charset_ and OPT_fwide_exec_charset_.
(c_common_init): Set cpp_opts->bytes_big_endian, not
cpp_opts->EBCDIC. Call cpp_init_iconv.
(print_help): Document -fexec-charset= and -fexec-wide-charset=.
(TARGET_EBCDIC): Delete default definition.
* objc/objc-act.c (build_objc_string_object): No need to
handle string constant concatenation.
cp:
* parser.c (cp_lexer_read_token): No need to handle string
constant concatenation.
testsuite:
* gcc.c-torture/execute/wchar_t-1.x: New file; XFAIL wchar_t-1.c
everywhere.
* gcc.dg/concat.c: Concatenation of string constants with
__FUNCTION__ / __PRETTY_FUNCTION__ is now a hard error.
* gcc.dg/wtr-strcat-1.c: Loosen dg-warning regexp.
* gcc.dg/cpp/escape-2.c: Use wide character constants where
necessary to avoid multi-character character constant warning.
* gcc.dg/cpp/escape.c: Likewise.
* gcc.dg/cpp/ucs.c: Likewise.
Remove backslashes from dg-bogus comments, as they confuse Tcl.
Fix a typo.
libstdc++-v3:
* testsuite/22_locale/collate/compare/wchar_t/2.cc
* testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc
* testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc
* testsuite/22_locale/collate/hash/wchar_t/2.cc
* testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc
* testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc
* testsuite/22_locale/collate/transform/wchar_t/2.cc
* testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc
* testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc:
XFAIL on all targets.
From-SVN: r68952
Diffstat (limited to 'gcc/cpplex.c')
-rw-r--r-- | gcc/cpplex.c | 289 |
1 files changed, 1 insertions, 288 deletions
diff --git a/gcc/cpplex.c b/gcc/cpplex.c index c536c76..edb765d 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -64,10 +64,8 @@ static void create_literal (cpp_reader *, cpp_token *, const uchar *, unsigned int, enum cpp_ttype); static bool warn_in_comment (cpp_reader *, _cpp_line_note *); static int name_p (cpp_reader *, const cpp_string *); -static cppchar_t maybe_read_ucn (cpp_reader *, const uchar **); static tokenrun *next_tokenrun (tokenrun *); -static unsigned int hex_digit_value (unsigned int); static _cpp_buff *new_buff (size_t); @@ -397,7 +395,7 @@ forms_identifier_p (cpp_reader *pfile, int first) && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) { buffer->cur += 2; - if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first)) + if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first)) return true; buffer->cur -= 2; } @@ -1316,291 +1314,6 @@ cpp_output_line (cpp_reader *pfile, FILE *fp) putc ('\n', fp); } -/* Returns the value of a hexadecimal digit. */ -static unsigned int -hex_digit_value (unsigned int c) -{ - if (hex_p (c)) - return hex_value (c); - else - abort (); -} - -/* Read a possible universal character name starting at *PSTR. */ -static cppchar_t -maybe_read_ucn (cpp_reader *pfile, const uchar **pstr) -{ - cppchar_t result, c = (*pstr)[-1]; - - result = _cpp_valid_ucn (pfile, pstr, false); - if (result) - { - if (CPP_WTRADITIONAL (pfile)) - cpp_error (pfile, DL_WARNING, - "the meaning of '\\%c' is different in traditional C", - (int) c); - - if (CPP_OPTION (pfile, EBCDIC)) - { - cpp_error (pfile, DL_ERROR, - "universal character with an EBCDIC target"); - result = 0x3f; /* EBCDIC invalid character */ - } - } - - return result; -} - -/* Returns the value of an escape sequence, truncated to the correct - target precision. PSTR points to the input pointer, which is just - after the backslash. LIMIT is how much text we have. WIDE is true - if the escape sequence is part of a wide character constant or - string literal. Handles all relevant diagnostics. */ -cppchar_t -cpp_parse_escape (cpp_reader *pfile, const unsigned char **pstr, - const unsigned char *limit, int wide) -{ - /* Values of \a \b \e \f \n \r \t \v respectively. */ - static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 }; - static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 }; - - int unknown = 0; - const unsigned char *str = *pstr, *charconsts; - cppchar_t c, ucn, mask; - unsigned int width; - - if (CPP_OPTION (pfile, EBCDIC)) - charconsts = ebcdic; - else - charconsts = ascii; - - if (wide) - width = CPP_OPTION (pfile, wchar_precision); - else - width = CPP_OPTION (pfile, char_precision); - if (width < BITS_PER_CPPCHAR_T) - mask = ((cppchar_t) 1 << width) - 1; - else - mask = ~0; - - c = *str++; - switch (c) - { - case '\\': case '\'': case '"': case '?': break; - case 'b': c = charconsts[1]; break; - case 'f': c = charconsts[3]; break; - case 'n': c = charconsts[4]; break; - case 'r': c = charconsts[5]; break; - case 't': c = charconsts[6]; break; - case 'v': c = charconsts[7]; break; - - case '(': case '{': case '[': case '%': - /* '\(', etc, are used at beginning of line to avoid confusing Emacs. - '\%' is used to prevent SCCS from getting confused. */ - unknown = CPP_PEDANTIC (pfile); - break; - - case 'a': - if (CPP_WTRADITIONAL (pfile)) - cpp_error (pfile, DL_WARNING, - "the meaning of '\\a' is different in traditional C"); - c = charconsts[0]; - break; - - case 'e': case 'E': - if (CPP_PEDANTIC (pfile)) - cpp_error (pfile, DL_PEDWARN, - "non-ISO-standard escape sequence, '\\%c'", (int) c); - c = charconsts[2]; - break; - - case 'u': case 'U': - ucn = maybe_read_ucn (pfile, &str); - if (ucn) - c = ucn; - else - unknown = true; - break; - - case 'x': - if (CPP_WTRADITIONAL (pfile)) - cpp_error (pfile, DL_WARNING, - "the meaning of '\\x' is different in traditional C"); - - { - cppchar_t i = 0, overflow = 0; - int digits_found = 0; - - while (str < limit) - { - c = *str; - if (! ISXDIGIT (c)) - break; - str++; - overflow |= i ^ (i << 4 >> 4); - i = (i << 4) + hex_digit_value (c); - digits_found = 1; - } - - if (!digits_found) - cpp_error (pfile, DL_ERROR, - "\\x used with no following hex digits"); - - if (overflow | (i != (i & mask))) - { - cpp_error (pfile, DL_PEDWARN, - "hex escape sequence out of range"); - i &= mask; - } - c = i; - } - break; - - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - { - size_t count = 0; - cppchar_t i = c - '0'; - - while (str < limit && ++count < 3) - { - c = *str; - if (c < '0' || c > '7') - break; - str++; - i = (i << 3) + c - '0'; - } - - if (i != (i & mask)) - { - cpp_error (pfile, DL_PEDWARN, - "octal escape sequence out of range"); - i &= mask; - } - c = i; - } - break; - - default: - unknown = 1; - break; - } - - if (unknown) - { - if (ISGRAPH (c)) - cpp_error (pfile, DL_PEDWARN, - "unknown escape sequence '\\%c'", (int) c); - else - cpp_error (pfile, DL_PEDWARN, - "unknown escape sequence: '\\%03o'", (int) c); - } - - if (c > mask) - { - cpp_error (pfile, DL_PEDWARN, - "escape sequence out of range for its type"); - c &= mask; - } - - *pstr = str; - return c; -} - -/* Interpret a (possibly wide) character constant in TOKEN. - WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN - points to a variable that is filled in with the number of - characters seen, and UNSIGNEDP to a variable that indicates whether - the result has signed type. */ -cppchar_t -cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, - unsigned int *pchars_seen, int *unsignedp) -{ - const unsigned char *str, *limit; - unsigned int chars_seen = 0; - size_t width, max_chars; - cppchar_t c, mask, result = 0; - bool unsigned_p; - - str = token->val.str.text + 1 + (token->type == CPP_WCHAR); - limit = token->val.str.text + token->val.str.len - 1; - - if (token->type == CPP_CHAR) - { - width = CPP_OPTION (pfile, char_precision); - max_chars = CPP_OPTION (pfile, int_precision) / width; - unsigned_p = CPP_OPTION (pfile, unsigned_char); - } - else - { - width = CPP_OPTION (pfile, wchar_precision); - max_chars = 1; - unsigned_p = CPP_OPTION (pfile, unsigned_wchar); - } - - if (width < BITS_PER_CPPCHAR_T) - mask = ((cppchar_t) 1 << width) - 1; - else - mask = ~0; - - while (str < limit) - { - c = *str++; - - if (c == '\\') - c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR); - -#ifdef MAP_CHARACTER - if (ISPRINT (c)) - c = MAP_CHARACTER (c); -#endif - - chars_seen++; - - /* Truncate the character, scale the result and merge the two. */ - c &= mask; - if (width < BITS_PER_CPPCHAR_T) - result = (result << width) | c; - else - result = c; - } - - if (chars_seen == 0) - cpp_error (pfile, DL_ERROR, "empty character constant"); - else if (chars_seen > 1) - { - /* Multichar charconsts are of type int and therefore signed. */ - unsigned_p = 0; - - if (chars_seen > max_chars) - { - chars_seen = max_chars; - cpp_error (pfile, DL_WARNING, - "character constant too long for its type"); - } - else if (CPP_OPTION (pfile, warn_multichar)) - cpp_error (pfile, DL_WARNING, "multi-character character constant"); - } - - /* Sign-extend or truncate the constant to cppchar_t. The value is - in WIDTH bits, but for multi-char charconsts it's value is the - full target type's width. */ - if (chars_seen > 1) - width *= max_chars; - if (width < BITS_PER_CPPCHAR_T) - { - mask = ((cppchar_t) 1 << width) - 1; - if (unsigned_p || !(result & (1 << (width - 1)))) - result &= mask; - else - result |= ~mask; - } - - *pchars_seen = chars_seen; - *unsignedp = unsigned_p; - return result; -} - /* Memory buffers. Changing these three constants can have a dramatic effect on performance. The values here are reasonable defaults, but might be tuned. If you adjust them, be sure to test across a |