charset.c (cpp_init_iconv): Initialize utf8_cset_desc.

* charset.c (cpp_init_iconv): Initialize utf8_cset_desc. (_cpp_destroy_iconv): Destroy utf8_cset_desc, char16_cset_desc and char32_cset_desc. (converter_for_type): Handle CPP_UTF8STRING. (cpp_interpret_string): Handle CPP_UTF8STRING and raw-strings. * directives.c (get__Pragma_string): Handle CPP_UTF8STRING. (parse_include): Reject raw strings. * include/cpplib.h (CPP_UTF8STRING): New token type. * internal.h (struct cpp_reader): Add utf8_cset_desc field. * lex.c (lex_raw_string): New function. (lex_string): Handle u8 string literals, call lex_raw_string for raw string literals. (_cpp_lex_direct): Call lex_string even for u8" and {,u,U,L,u8}R" sequences. * macro.c (stringify_arg): Handle CPP_UTF8STRING. * c-common.c (c_parse_error): Handle CPP_UTF8STRING. * c-lex.c (c_lex_with_flags): Likewise. Test C_LEX_STRING_NO_JOIN instead of C_LEX_RAW_STRINGS. (lex_string): Handle CPP_UTF8STRING. * c-parser.c (c_parser_postfix_expression): Likewise. * c-pragma.h (C_LEX_RAW_STRINGS): Rename to ... (C_LEX_STRING_NO_JOIN): ... this. * parser.c (cp_lexer_print_token, cp_parser_is_string_literal, cp_parser_string_literal, cp_parser_primary_expression): Likewise. (cp_lexer_get_preprocessor_token): Use C_LEX_STRING_JOIN instead of C_LEX_RAW_STRINGS. * gcc.dg/raw-string-1.c: New test. * gcc.dg/raw-string-2.c: New test. * gcc.dg/raw-string-3.c: New test. * gcc.dg/raw-string-4.c: New test. * gcc.dg/raw-string-5.c: New test. * gcc.dg/raw-string-6.c: New test. * gcc.dg/raw-string-7.c: New test. * gcc.dg/utf8-1.c: New test. * gcc.dg/utf8-2.c: New test. * gcc.dg/utf-badconcat2.c: New test. * gcc.dg/utf-dflt2.c: New test. * gcc.dg/cpp/include6.c: New test. * g++.dg/ext/raw-string-1.C: New test. * g++.dg/ext/raw-string-2.C: New test. * g++.dg/ext/raw-string-3.C: New test. * g++.dg/ext/raw-string-4.C: New test. * g++.dg/ext/raw-string-5.C: New test. * g++.dg/ext/raw-string-6.C: New test. * g++.dg/ext/raw-string-7.C: New test. * g++.dg/ext/utf8-1.C: New test. * g++.dg/ext/utf8-2.C: New test. * g++.dg/ext/utf-badconcat2.C: New test. * g++.dg/ext/utf-dflt2.C: New test. From-SVN: r152995
author: Jakub Jelinek <jakub@redhat.com> 2009-10-19 23:41:15 +0200
committer: Jakub Jelinek <jakub@gcc.gnu.org> 2009-10-19 23:41:15 +0200
commit: 2c6e3f5540bc82374b7f71333f550cfccca93a74 (patch)
tree: 2156a8d43e7b0cb0eb28776c7a735b606e1caea8 /gcc/c-lex.c
parent: 4d696ad0116dafa47b891ceb0db246ad8b81c8ff (diff)
download: gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.zip
gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.gz
gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.bz2
1 files changed, 12 insertions, 6 deletions
diff --git a/gcc/c-lex.c b/gcc/c-lex.c
index 0c6cdab..fd3df8c 100644
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -365,6 +365,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
 	    case CPP_WSTRING:
 	    case CPP_STRING16:
 	    case CPP_STRING32:
+	    case CPP_UTF8STRING:
 	      type = lex_string (tok, value, true, true);
 	      break;
 
@@ -423,7 +424,8 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
     case CPP_WSTRING:
     case CPP_STRING16:
     case CPP_STRING32:
-      if ((lex_flags & C_LEX_RAW_STRINGS) == 0)
+    case CPP_UTF8STRING:
+      if ((lex_flags & C_LEX_STRING_NO_JOIN) == 0)
 	{
 	  type = lex_string (tok, value, false,
 			     (lex_flags & C_LEX_STRING_NO_TRANSLATE) == 0);
@@ -871,12 +873,13 @@ interpret_fixed (const cpp_token *token, unsigned int flags)
   return value;
 }
 
-/* Convert a series of STRING, WSTRING, STRING16 and/or STRING32 tokens
-   into a tree, performing string constant concatenation.  TOK is the
-   first of these.  VALP is the location to write the string into.
-   OBJC_STRING indicates whether an '@' token preceded the incoming token.
+/* Convert a series of STRING, WSTRING, STRING16, STRING32 and/or
+   UTF8STRING tokens into a tree, performing string constant
+   concatenation.  TOK is the first of these.  VALP is the location
+   to write the string into. OBJC_STRING indicates whether an '@' token
+   preceded the incoming token.
    Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
-   CPP_STRING32, CPP_STRING16, or CPP_OBJC_STRING).
+   CPP_STRING32, CPP_STRING16, CPP_UTF8STRING, or CPP_OBJC_STRING).
 
    This is unfortunately more work than it should be.  If any of the
    strings in the series has an L prefix, the result is a wide string
@@ -921,6 +924,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
     case CPP_WSTRING:
     case CPP_STRING16:
     case CPP_STRING32:
+    case CPP_UTF8STRING:
       if (type != tok->type)
 	{
 	  if (type == CPP_STRING)
@@ -966,6 +970,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
 	{
 	default:
 	case CPP_STRING:
+	case CPP_UTF8STRING:
 	  value = build_string (1, "");
 	  break;
 	case CPP_STRING16:
@@ -991,6 +996,7 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
     {
     default:
     case CPP_STRING:
+    case CPP_UTF8STRING:
       TREE_TYPE (value) = char_array_type_node;
       break;
     case CPP_STRING16:
author	Jakub Jelinek <jakub@redhat.com>	2009-10-19 23:41:15 +0200
committer	Jakub Jelinek <jakub@gcc.gnu.org>	2009-10-19 23:41:15 +0200
commit	2c6e3f5540bc82374b7f71333f550cfccca93a74 (patch)
tree	2156a8d43e7b0cb0eb28776c7a735b606e1caea8 /gcc/c-lex.c
parent	4d696ad0116dafa47b891ceb0db246ad8b81c8ff (diff)
download	gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.zip gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.gz gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.bz2