aboutsummaryrefslogtreecommitdiff
path: root/gcc/c-lex.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2009-10-19 23:41:15 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2009-10-19 23:41:15 +0200
commit2c6e3f5540bc82374b7f71333f550cfccca93a74 (patch)
tree2156a8d43e7b0cb0eb28776c7a735b606e1caea8 /gcc/c-lex.c
parent4d696ad0116dafa47b891ceb0db246ad8b81c8ff (diff)
downloadgcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.zip
gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.gz
gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.bz2
charset.c (cpp_init_iconv): Initialize utf8_cset_desc.
* charset.c (cpp_init_iconv): Initialize utf8_cset_desc. (_cpp_destroy_iconv): Destroy utf8_cset_desc, char16_cset_desc and char32_cset_desc. (converter_for_type): Handle CPP_UTF8STRING. (cpp_interpret_string): Handle CPP_UTF8STRING and raw-strings. * directives.c (get__Pragma_string): Handle CPP_UTF8STRING. (parse_include): Reject raw strings. * include/cpplib.h (CPP_UTF8STRING): New token type. * internal.h (struct cpp_reader): Add utf8_cset_desc field. * lex.c (lex_raw_string): New function. (lex_string): Handle u8 string literals, call lex_raw_string for raw string literals. (_cpp_lex_direct): Call lex_string even for u8" and {,u,U,L,u8}R" sequences. * macro.c (stringify_arg): Handle CPP_UTF8STRING. * c-common.c (c_parse_error): Handle CPP_UTF8STRING. * c-lex.c (c_lex_with_flags): Likewise. Test C_LEX_STRING_NO_JOIN instead of C_LEX_RAW_STRINGS. (lex_string): Handle CPP_UTF8STRING. * c-parser.c (c_parser_postfix_expression): Likewise. * c-pragma.h (C_LEX_RAW_STRINGS): Rename to ... (C_LEX_STRING_NO_JOIN): ... this. * parser.c (cp_lexer_print_token, cp_parser_is_string_literal, cp_parser_string_literal, cp_parser_primary_expression): Likewise. (cp_lexer_get_preprocessor_token): Use C_LEX_STRING_JOIN instead of C_LEX_RAW_STRINGS. * gcc.dg/raw-string-1.c: New test. * gcc.dg/raw-string-2.c: New test. * gcc.dg/raw-string-3.c: New test. * gcc.dg/raw-string-4.c: New test. * gcc.dg/raw-string-5.c: New test. * gcc.dg/raw-string-6.c: New test. * gcc.dg/raw-string-7.c: New test. * gcc.dg/utf8-1.c: New test. * gcc.dg/utf8-2.c: New test. * gcc.dg/utf-badconcat2.c: New test. * gcc.dg/utf-dflt2.c: New test. * gcc.dg/cpp/include6.c: New test. * g++.dg/ext/raw-string-1.C: New test. * g++.dg/ext/raw-string-2.C: New test. * g++.dg/ext/raw-string-3.C: New test. * g++.dg/ext/raw-string-4.C: New test. * g++.dg/ext/raw-string-5.C: New test. * g++.dg/ext/raw-string-6.C: New test. * g++.dg/ext/raw-string-7.C: New test. * g++.dg/ext/utf8-1.C: New test. * g++.dg/ext/utf8-2.C: New test. * g++.dg/ext/utf-badconcat2.C: New test. * g++.dg/ext/utf-dflt2.C: New test. From-SVN: r152995
Diffstat (limited to 'gcc/c-lex.c')
-rw-r--r--gcc/c-lex.c18
1 files changed, 12 insertions, 6 deletions
diff --git a/gcc/c-lex.c b/gcc/c-lex.c
index 0c6cdab..fd3df8c 100644
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -365,6 +365,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
case CPP_WSTRING:
case CPP_STRING16:
case CPP_STRING32:
+ case CPP_UTF8STRING:
type = lex_string (tok, value, true, true);
break;
@@ -423,7 +424,8 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
case CPP_WSTRING:
case CPP_STRING16:
case CPP_STRING32:
- if ((lex_flags & C_LEX_RAW_STRINGS) == 0)
+ case CPP_UTF8STRING:
+ if ((lex_flags & C_LEX_STRING_NO_JOIN) == 0)
{
type = lex_string (tok, value, false,
(lex_flags & C_LEX_STRING_NO_TRANSLATE) == 0);
@@ -871,12 +873,13 @@ interpret_fixed (const cpp_token *token, unsigned int flags)
return value;
}
-/* Convert a series of STRING, WSTRING, STRING16 and/or STRING32 tokens
- into a tree, performing string constant concatenation. TOK is the
- first of these. VALP is the location to write the string into.
- OBJC_STRING indicates whether an '@' token preceded the incoming token.
+/* Convert a series of STRING, WSTRING, STRING16, STRING32 and/or
+ UTF8STRING tokens into a tree, performing string constant
+ concatenation. TOK is the first of these. VALP is the location
+ to write the string into. OBJC_STRING indicates whether an '@' token
+ preceded the incoming token.
Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
- CPP_STRING32, CPP_STRING16, or CPP_OBJC_STRING).
+ CPP_STRING32, CPP_STRING16, CPP_UTF8STRING, or CPP_OBJC_STRING).
This is unfortunately more work than it should be. If any of the
strings in the series has an L prefix, the result is a wide string
@@ -921,6 +924,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
case CPP_WSTRING:
case CPP_STRING16:
case CPP_STRING32:
+ case CPP_UTF8STRING:
if (type != tok->type)
{
if (type == CPP_STRING)
@@ -966,6 +970,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
{
default:
case CPP_STRING:
+ case CPP_UTF8STRING:
value = build_string (1, "");
break;
case CPP_STRING16:
@@ -991,6 +996,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
{
default:
case CPP_STRING:
+ case CPP_UTF8STRING:
TREE_TYPE (value) = char_array_type_node;
break;
case CPP_STRING16: