diff options
author | Neil Booth <neil@daikokuya.co.uk> | 2003-04-23 22:44:06 +0000 |
---|---|---|
committer | Neil Booth <neil@gcc.gnu.org> | 2003-04-23 22:44:06 +0000 |
commit | 6338b35872d465cf27fdbbc43b5a146363c8f246 (patch) | |
tree | e4f819e101d1dc188ae9d2012e0cb8ab2239160d /gcc/cpplex.c | |
parent | 06f5e63748eeb66140858914bbffb149406789a9 (diff) | |
download | gcc-6338b35872d465cf27fdbbc43b5a146363c8f246.zip gcc-6338b35872d465cf27fdbbc43b5a146363c8f246.tar.gz gcc-6338b35872d465cf27fdbbc43b5a146363c8f246.tar.bz2 |
Makefile.in (c-lex.o, [...]): Update.
* Makefile.in (c-lex.o, LIBCPP_OBJS, cpplex.o): Update.
* c-lex.c (MULTIBYTE_CHARS): Remove conditionals.
(lex_string): Take cpp_string with full spelling.
(cb_ident): Update.
(c_lex): Update diagnostics.
* cpplex.c (SPELL_NUMBER, SPELL_STRING): Combine into SPELL_LITERAL.
(create_literal): New.
(lex_string): Unterminated literals have type CPP_OTHER.
(_cpp_lex_direct): Update calls to lex_string. Use create_literal
for CPP_OTHER.
(cpp_token_len, cpp_spell_token, cpp_output_token): Simplify.
(_cpp_equiv_tokens, cpp_interpret_charconst): Update.
* cpplib.c (parse_include, do_line, do_linemarker,
destringize_and_run): Update for token storing full spelling.
* cpplib.h: Update token spelling types.
* cppmacro.c (stringify_arg, check_trad_stringification):
Update for token storing full spelling.
cp:
* Make-lang.in (lex.o): Remove mbchar.h.
* lex.c (MULTIBYTE_CHARS): Lose.
* parser.c (cp_lexer_get_preprocessor_token): CPP_OTHER handled
in c-lex.c.
testsuite:
* gcc.dg/cpp/include2.c: Update.
* gcc.dg/cpp/multiline-2.c: New.
* gcc.dg/cpp/multiline.c: Update.
* gcc.dg/cpp/strify2.c: Update.
* gcc.dg/cpp/trad/literals-2.c: Update.
From-SVN: r66019
Diffstat (limited to 'gcc/cpplex.c')
-rw-r--r-- | gcc/cpplex.c | 185 |
1 files changed, 71 insertions, 114 deletions
diff --git a/gcc/cpplex.c b/gcc/cpplex.c index d6c617d3..c9c0641 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -26,14 +26,11 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "cpplib.h" #include "cpphash.h" -/* Tokens with SPELL_STRING store their spelling in the token list, - and it's length in the token->val.name.len. */ enum spell_type { SPELL_OPERATOR = 0, SPELL_IDENT, - SPELL_NUMBER, - SPELL_STRING, + SPELL_LITERAL, SPELL_NONE }; @@ -61,9 +58,11 @@ static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t)); static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *)); static void lex_number PARAMS ((cpp_reader *, cpp_string *)); static bool forms_identifier_p PARAMS ((cpp_reader *, int)); -static void lex_string PARAMS ((cpp_reader *, cpp_token *)); +static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *)); static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *, cppchar_t)); +static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *, + unsigned int, enum cpp_ttype)); static int name_p PARAMS ((cpp_reader *, const cpp_string *)); static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **)); static tokenrun *next_tokenrun PARAMS ((tokenrun *)); @@ -468,63 +467,77 @@ lex_number (pfile, number) number->text = dest; } +/* Create a token of type TYPE with a literal spelling. */ +static void +create_literal (pfile, token, base, len, type) + cpp_reader *pfile; + cpp_token *token; + const uchar *base; + unsigned int len; + enum cpp_ttype type; +{ + uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); + + memcpy (dest, base, len); + dest[len] = '\0'; + token->type = type; + token->val.str.len = len; + token->val.str.text = dest; +} + /* Lexes a string, character constant, or angle-bracketed header file - name. The stored string is guaranteed NUL-terminated, but it is - not guaranteed that this is the first NUL since embedded NULs are - preserved. */ + name. The stored string contains the spelling, including opening + quote and leading any leading 'L'. It returns the type of the + literal, or CPP_OTHER if it was not properly terminated. + + The spelling is NUL-terminated, but it is not guaranteed that this + is the first NUL since embedded NULs are preserved. */ static void -lex_string (pfile, token) +lex_string (pfile, token, base) cpp_reader *pfile; cpp_token *token; + const uchar *base; { - cpp_buffer *buffer = pfile->buffer; - bool warned_nulls = false; - const uchar *base; - uchar *dest; + bool saw_NUL = false; + const uchar *cur; cppchar_t terminator; - - base = buffer->cur; - terminator = base[-1]; - if (terminator == '<') - terminator = '>'; + enum cpp_ttype type; + + cur = base; + terminator = *cur++; + if (terminator == 'L') + terminator = *cur++; + if (terminator == '\"') + type = *base == 'L' ? CPP_WSTRING: CPP_STRING; + else if (terminator == '\'') + type = *base == 'L' ? CPP_WCHAR: CPP_CHAR; + else + terminator = '>', type = CPP_HEADER_NAME; for (;;) { - cppchar_t c = *buffer->cur++; + cppchar_t c = *cur++; /* In #include-style directives, terminators are not escapable. */ - if (c == '\\' && !pfile->state.angled_headers && *buffer->cur != '\n') - buffer->cur++; - else if (c == terminator || c == '\n') + if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') + cur++; + else if (c == terminator) break; - else if (c == '\0') + else if (c == '\n') { - if (!warned_nulls) - { - warned_nulls = true; - cpp_error (pfile, DL_WARNING, - "null character(s) preserved in literal"); - } + cur--; + type = CPP_OTHER; + break; } + else if (c == '\0') + saw_NUL = true; } - token->val.str.len = buffer->cur - base - 1; - dest = _cpp_unaligned_alloc (pfile, token->val.str.len + 1); - memcpy (dest, base, token->val.str.len); - dest[token->val.str.len] = '\0'; - token->val.str.text = dest; + if (saw_NUL && !pfile->state.skipping) + cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal"); - if (buffer->cur[-1] == '\n') - { - /* No string literal may extend over multiple lines. In - assembly language, suppress the error except for <> - includes. This is a kludge around not knowing where - comments are. */ - if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>') - cpp_error (pfile, DL_ERROR, "missing terminating %c character", - (int) terminator); - buffer->cur--; - } + pfile->buffer->cur = cur; + create_literal (pfile, token, base, cur - base, type); } /* The stored comment includes the comment start and any terminator. */ @@ -817,9 +830,7 @@ _cpp_lex_direct (pfile) /* 'L' may introduce wide characters or strings. */ if (*buffer->cur == '\'' || *buffer->cur == '"') { - result->type = (*buffer->cur == '"' ? CPP_WSTRING: CPP_WCHAR); - buffer->cur++; - lex_string (pfile, result); + lex_string (pfile, result, buffer->cur - 1); break; } /* Fall through. */ @@ -848,8 +859,7 @@ _cpp_lex_direct (pfile) case '\'': case '"': - result->type = c == '"' ? CPP_STRING: CPP_CHAR; - lex_string (pfile, result); + lex_string (pfile, result, buffer->cur - 1); break; case '/': @@ -905,8 +915,7 @@ _cpp_lex_direct (pfile) case '<': if (pfile->state.angled_headers) { - result->type = CPP_HEADER_NAME; - lex_string (pfile, result); + lex_string (pfile, result, buffer->cur - 1); break; } @@ -1078,15 +1087,8 @@ _cpp_lex_direct (pfile) } default: - { - uchar *dest = _cpp_unaligned_alloc (pfile, 1 + 1); - dest[0] = c; - dest[1] = '\0'; - result->type = CPP_OTHER; - result->val.str.len = 1; - result->val.str.text = dest; - break; - } + create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); + break; } return result; @@ -1103,8 +1105,7 @@ cpp_token_len (token) switch (TOKEN_SPELL (token)) { default: len = 0; break; - case SPELL_NUMBER: - case SPELL_STRING: len = token->val.str.len; break; + case SPELL_LITERAL: len = token->val.str.len; break; case SPELL_IDENT: len = NODE_LEN (token->val.node); break; } /* 1 for whitespace, 4 for comment delimiters. */ @@ -1147,34 +1148,11 @@ cpp_spell_token (pfile, token, buffer) buffer += NODE_LEN (token->val.node); break; - case SPELL_NUMBER: + case SPELL_LITERAL: memcpy (buffer, token->val.str.text, token->val.str.len); buffer += token->val.str.len; break; - case SPELL_STRING: - { - int left, right, tag; - switch (token->type) - { - case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break; - case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break; - case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break; - case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break; - case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break; - default: - cpp_error (pfile, DL_ICE, "unknown string token %s\n", - TOKEN_NAME (token)); - return buffer; - } - if (tag) *buffer++ = tag; - *buffer++ = left; - memcpy (buffer, token->val.str.text, token->val.str.len); - buffer += token->val.str.len; - *buffer++ = right; - } - break; - case SPELL_NONE: cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token)); break; @@ -1243,31 +1221,10 @@ cpp_output_token (token, fp) fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp); break; - case SPELL_NUMBER: + case SPELL_LITERAL: fwrite (token->val.str.text, 1, token->val.str.len, fp); break; - case SPELL_STRING: - { - int left, right, tag; - switch (token->type) - { - case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break; - case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break; - case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break; - case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break; - case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break; - default: - fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token)); - return; - } - if (tag) putc (tag, fp); - putc (left, fp); - fwrite (token->val.str.text, 1, token->val.str.len, fp); - putc (right, fp); - } - break; - case SPELL_NONE: /* An error, most probably. */ break; @@ -1289,8 +1246,7 @@ _cpp_equiv_tokens (a, b) return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no); case SPELL_IDENT: return a->val.node == b->val.node; - case SPELL_NUMBER: - case SPELL_STRING: + case SPELL_LITERAL: return (a->val.str.len == b->val.str.len && !memcmp (a->val.str.text, b->val.str.text, a->val.str.len)); @@ -1588,14 +1544,15 @@ cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp) unsigned int *pchars_seen; int *unsignedp; { - const unsigned char *str = token->val.str.text; - const unsigned char *limit = str + token->val.str.len; + const unsigned char *str, *limit; unsigned int chars_seen = 0; size_t width, max_chars; cppchar_t c, mask, result = 0; bool unsigned_p; - /* Width in bits. */ + str = token->val.str.text + 1 + (token->type == CPP_WCHAR); + limit = token->val.str.text + token->val.str.len - 1; + if (token->type == CPP_CHAR) { width = CPP_OPTION (pfile, char_precision); |