diff options
author | Jakub Jelinek <jakub@redhat.com> | 2009-10-19 23:41:15 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2009-10-19 23:41:15 +0200 |
commit | 2c6e3f5540bc82374b7f71333f550cfccca93a74 (patch) | |
tree | 2156a8d43e7b0cb0eb28776c7a735b606e1caea8 /gcc/c-lex.c | |
parent | 4d696ad0116dafa47b891ceb0db246ad8b81c8ff (diff) | |
download | gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.zip gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.gz gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.bz2 |
charset.c (cpp_init_iconv): Initialize utf8_cset_desc.
* charset.c (cpp_init_iconv): Initialize utf8_cset_desc.
(_cpp_destroy_iconv): Destroy utf8_cset_desc, char16_cset_desc
and char32_cset_desc.
(converter_for_type): Handle CPP_UTF8STRING.
(cpp_interpret_string): Handle CPP_UTF8STRING and raw-strings.
* directives.c (get__Pragma_string): Handle CPP_UTF8STRING.
(parse_include): Reject raw strings.
* include/cpplib.h (CPP_UTF8STRING): New token type.
* internal.h (struct cpp_reader): Add utf8_cset_desc field.
* lex.c (lex_raw_string): New function.
(lex_string): Handle u8 string literals, call lex_raw_string
for raw string literals.
(_cpp_lex_direct): Call lex_string even for u8" and {,u,U,L,u8}R"
sequences.
* macro.c (stringify_arg): Handle CPP_UTF8STRING.
* c-common.c (c_parse_error): Handle CPP_UTF8STRING.
* c-lex.c (c_lex_with_flags): Likewise. Test C_LEX_STRING_NO_JOIN
instead of C_LEX_RAW_STRINGS.
(lex_string): Handle CPP_UTF8STRING.
* c-parser.c (c_parser_postfix_expression): Likewise.
* c-pragma.h (C_LEX_RAW_STRINGS): Rename to ...
(C_LEX_STRING_NO_JOIN): ... this.
* parser.c (cp_lexer_print_token, cp_parser_is_string_literal,
cp_parser_string_literal, cp_parser_primary_expression): Likewise.
(cp_lexer_get_preprocessor_token): Use C_LEX_STRING_JOIN instead
of C_LEX_RAW_STRINGS.
* gcc.dg/raw-string-1.c: New test.
* gcc.dg/raw-string-2.c: New test.
* gcc.dg/raw-string-3.c: New test.
* gcc.dg/raw-string-4.c: New test.
* gcc.dg/raw-string-5.c: New test.
* gcc.dg/raw-string-6.c: New test.
* gcc.dg/raw-string-7.c: New test.
* gcc.dg/utf8-1.c: New test.
* gcc.dg/utf8-2.c: New test.
* gcc.dg/utf-badconcat2.c: New test.
* gcc.dg/utf-dflt2.c: New test.
* gcc.dg/cpp/include6.c: New test.
* g++.dg/ext/raw-string-1.C: New test.
* g++.dg/ext/raw-string-2.C: New test.
* g++.dg/ext/raw-string-3.C: New test.
* g++.dg/ext/raw-string-4.C: New test.
* g++.dg/ext/raw-string-5.C: New test.
* g++.dg/ext/raw-string-6.C: New test.
* g++.dg/ext/raw-string-7.C: New test.
* g++.dg/ext/utf8-1.C: New test.
* g++.dg/ext/utf8-2.C: New test.
* g++.dg/ext/utf-badconcat2.C: New test.
* g++.dg/ext/utf-dflt2.C: New test.
From-SVN: r152995
Diffstat (limited to 'gcc/c-lex.c')
-rw-r--r-- | gcc/c-lex.c | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/gcc/c-lex.c b/gcc/c-lex.c index 0c6cdab..fd3df8c 100644 --- a/gcc/c-lex.c +++ b/gcc/c-lex.c @@ -365,6 +365,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags, case CPP_WSTRING: case CPP_STRING16: case CPP_STRING32: + case CPP_UTF8STRING: type = lex_string (tok, value, true, true); break; @@ -423,7 +424,8 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags, case CPP_WSTRING: case CPP_STRING16: case CPP_STRING32: - if ((lex_flags & C_LEX_RAW_STRINGS) == 0) + case CPP_UTF8STRING: + if ((lex_flags & C_LEX_STRING_NO_JOIN) == 0) { type = lex_string (tok, value, false, (lex_flags & C_LEX_STRING_NO_TRANSLATE) == 0); @@ -871,12 +873,13 @@ interpret_fixed (const cpp_token *token, unsigned int flags) return value; } -/* Convert a series of STRING, WSTRING, STRING16 and/or STRING32 tokens - into a tree, performing string constant concatenation. TOK is the - first of these. VALP is the location to write the string into. - OBJC_STRING indicates whether an '@' token preceded the incoming token. +/* Convert a series of STRING, WSTRING, STRING16, STRING32 and/or + UTF8STRING tokens into a tree, performing string constant + concatenation. TOK is the first of these. VALP is the location + to write the string into. OBJC_STRING indicates whether an '@' token + preceded the incoming token. Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING, - CPP_STRING32, CPP_STRING16, or CPP_OBJC_STRING). + CPP_STRING32, CPP_STRING16, CPP_UTF8STRING, or CPP_OBJC_STRING). This is unfortunately more work than it should be. If any of the strings in the series has an L prefix, the result is a wide string @@ -921,6 +924,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate) case CPP_WSTRING: case CPP_STRING16: case CPP_STRING32: + case CPP_UTF8STRING: if (type != tok->type) { if (type == CPP_STRING) @@ -966,6 +970,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate) { default: case CPP_STRING: + case CPP_UTF8STRING: value = build_string (1, ""); break; case CPP_STRING16: @@ -991,6 +996,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate) { default: case CPP_STRING: + case CPP_UTF8STRING: TREE_TYPE (value) = char_array_type_node; break; case CPP_STRING16: |