charset.c (cpp_init_iconv): Initialize utf8_cset_desc.

* charset.c (cpp_init_iconv): Initialize utf8_cset_desc. (_cpp_destroy_iconv): Destroy utf8_cset_desc, char16_cset_desc and char32_cset_desc. (converter_for_type): Handle CPP_UTF8STRING. (cpp_interpret_string): Handle CPP_UTF8STRING and raw-strings. * directives.c (get__Pragma_string): Handle CPP_UTF8STRING. (parse_include): Reject raw strings. * include/cpplib.h (CPP_UTF8STRING): New token type. * internal.h (struct cpp_reader): Add utf8_cset_desc field. * lex.c (lex_raw_string): New function. (lex_string): Handle u8 string literals, call lex_raw_string for raw string literals. (_cpp_lex_direct): Call lex_string even for u8" and {,u,U,L,u8}R" sequences. * macro.c (stringify_arg): Handle CPP_UTF8STRING. * c-common.c (c_parse_error): Handle CPP_UTF8STRING. * c-lex.c (c_lex_with_flags): Likewise. Test C_LEX_STRING_NO_JOIN instead of C_LEX_RAW_STRINGS. (lex_string): Handle CPP_UTF8STRING. * c-parser.c (c_parser_postfix_expression): Likewise. * c-pragma.h (C_LEX_RAW_STRINGS): Rename to ... (C_LEX_STRING_NO_JOIN): ... this. * parser.c (cp_lexer_print_token, cp_parser_is_string_literal, cp_parser_string_literal, cp_parser_primary_expression): Likewise. (cp_lexer_get_preprocessor_token): Use C_LEX_STRING_JOIN instead of C_LEX_RAW_STRINGS. * gcc.dg/raw-string-1.c: New test. * gcc.dg/raw-string-2.c: New test. * gcc.dg/raw-string-3.c: New test. * gcc.dg/raw-string-4.c: New test. * gcc.dg/raw-string-5.c: New test. * gcc.dg/raw-string-6.c: New test. * gcc.dg/raw-string-7.c: New test. * gcc.dg/utf8-1.c: New test. * gcc.dg/utf8-2.c: New test. * gcc.dg/utf-badconcat2.c: New test. * gcc.dg/utf-dflt2.c: New test. * gcc.dg/cpp/include6.c: New test. * g++.dg/ext/raw-string-1.C: New test. * g++.dg/ext/raw-string-2.C: New test. * g++.dg/ext/raw-string-3.C: New test. * g++.dg/ext/raw-string-4.C: New test. * g++.dg/ext/raw-string-5.C: New test. * g++.dg/ext/raw-string-6.C: New test. * g++.dg/ext/raw-string-7.C: New test. * g++.dg/ext/utf8-1.C: New test. * g++.dg/ext/utf8-2.C: New test. * g++.dg/ext/utf-badconcat2.C: New test. * g++.dg/ext/utf-dflt2.C: New test. From-SVN: r152995
author: Jakub Jelinek <jakub@redhat.com> 2009-10-19 23:41:15 +0200
committer: Jakub Jelinek <jakub@gcc.gnu.org> 2009-10-19 23:41:15 +0200
commit: 2c6e3f5540bc82374b7f71333f550cfccca93a74 (patch)
tree: 2156a8d43e7b0cb0eb28776c7a735b606e1caea8 /gcc/testsuite/gcc.dg/utf8-1.c
parent: 4d696ad0116dafa47b891ceb0db246ad8b81c8ff (diff)
download: gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.zip
gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.gz
gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.bz2
1 files changed, 45 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.dg/utf8-1.c b/gcc/testsuite/gcc.dg/utf8-1.c
new file mode 100644
index 0000000..e287e06
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/utf8-1.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-require-iconv "ISO-8859-2" } */
+/* { dg-options "-std=gnu99 -fexec-charset=ISO-8859-2" } */
+
+const char *str1 = "h\u00e1\U0000010Dky ";
+const char *str2 = "\u010d\u00E1rky\n";
+const char *str3 = u8"h\u00e1\U0000010Dky ";
+const char *str4 = u8"\u010d\u00E1rky\n";
+const char *str5 = "h\u00e1\U0000010Dky " "\u010d\u00E1rky\n";
+const char *str6 = u8"h\u00e1\U0000010Dky " "\u010d\u00E1rky\n";
+const char *str7 = "h\u00e1\U0000010Dky " u8"\u010d\u00E1rky\n";
+#define u8
+const char *str8 = u8"h\u00e1\U0000010Dky " u8"\u010d\u00E1rky\n";
+
+const char latin2_1[] = "\x68\xe1\xe8\x6b\x79\x20";
+const char latin2_2[] = "\xe8\xe1\x72\x6b\x79\n";
+const char utf8_1[] = "\x68\xc3\xa1\xc4\x8d\x6b\x79\x20";
+const char utf8_2[] = "\xc4\x8d\xc3\xa1\x72\x6b\x79\n";
+
+int
+main (void)
+{
+  if (__builtin_strcmp (str1, latin2_1) != 0
+      || __builtin_strcmp (str2, latin2_2) != 0
+      || __builtin_strcmp (str3, utf8_1) != 0
+      || __builtin_strcmp (str4, utf8_2) != 0
+      || __builtin_strncmp (str5, latin2_1, sizeof (latin2_1) - 1) != 0
+      || __builtin_strcmp (str5 + sizeof (latin2_1) - 1, latin2_2) != 0
+      || __builtin_strncmp (str6, utf8_1, sizeof (utf8_1) - 1) != 0
+      || __builtin_strcmp (str6 + sizeof (utf8_1) - 1, utf8_2) != 0
+      || __builtin_strncmp (str7, utf8_1, sizeof (utf8_1) - 1) != 0
+      || __builtin_strcmp (str7 + sizeof (utf8_1) - 1, utf8_2) != 0
+      || __builtin_strncmp (str8, utf8_1, sizeof (utf8_1) - 1) != 0
+      || __builtin_strcmp (str8 + sizeof (utf8_1) - 1, utf8_2) != 0)
+    __builtin_abort ();
+  if (sizeof ("a" u8"b"[0]) != 1
+      || sizeof (u8"a" "b"[0]) != 1
+      || sizeof (u8"a" u8"b"[0]) != 1
+      || sizeof ("a" "\u010d") != 3
+      || sizeof ("a" u8"\u010d") != 4
+      || sizeof (u8"a" "\u010d") != 4
+      || sizeof (u8"a" "\u010d") != 4)
+    __builtin_abort ();
+  return 0;
+}
author	Jakub Jelinek <jakub@redhat.com>	2009-10-19 23:41:15 +0200
committer	Jakub Jelinek <jakub@gcc.gnu.org>	2009-10-19 23:41:15 +0200
commit	2c6e3f5540bc82374b7f71333f550cfccca93a74 (patch)
tree	2156a8d43e7b0cb0eb28776c7a735b606e1caea8 /gcc/testsuite/gcc.dg/utf8-1.c
parent	4d696ad0116dafa47b891ceb0db246ad8b81c8ff (diff)
download	gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.zip gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.gz gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.bz2