aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2023-11-14 18:28:34 +0100
committerJakub Jelinek <jakub@redhat.com>2023-11-14 18:28:34 +0100
commit194825f20619a1c4b51eaea84f20432fefc0db03 (patch)
tree93e0f44cfa40ba14f7585d7aee9464f25b3f15e7 /gcc
parent948b8b6e0e50958ecf56d4d9fb7ac16f245d9cc3 (diff)
downloadgcc-194825f20619a1c4b51eaea84f20432fefc0db03.zip
gcc-194825f20619a1c4b51eaea84f20432fefc0db03.tar.gz
gcc-194825f20619a1c4b51eaea84f20432fefc0db03.tar.bz2
c++: Implement C++26 P1854R4 - Making non-encodable string literals ill-formed [PR110341]
This paper voted in as DR makes some multi-character literals ill-formed. 'abcd' stays valid, but e.g. 'á' is newly invalid in UTF-8 exec charset while valid e.g. in ISO-8859-1, because it is a single character which needs 2 bytes to be encoded. The following patch does that by checking (only pedantically, especially because it is a DR) if we'd emit a -Wmultichar warning because character constant has more than one byte in it whether the number of source characters is equal to the number of bytes in the multichar string. If it is, it is normal multi-character literal constant and is diagnosed normally with -Wmultichar, otherwise at least one of the c-chars in the sequence was encoded as 2+ bytes. 2023-11-14 Jakub Jelinek <jakub@redhat.com> PR c++/110341 libcpp/ * charset.cc: Implement C++26 P1854R4 - Making non-encodable string literals ill-formed. (one_count_chars, convert_count_chars, count_source_chars): New functions. (narrow_str_to_charconst): Change last arg type from cpp_ttype to const cpp_token *. For C++ if pedantic and i > 1 in CPP_CHAR interpret token also as CPP_STRING32 and if number of characters in the CPP_STRING32 is larger than number of bytes in CPP_CHAR, pedwarn on it. Make the diagnostics more detailed. (wide_str_to_charconst): Change last arg type from cpp_ttype to const cpp_token *. Make the diagnostics more detailed. (cpp_interpret_charconst): Adjust narrow_str_to_charconst and wide_str_to_charconst callers. gcc/testsuite/ * g++.dg/cpp26/literals1.C: New test. * g++.dg/cpp26/literals2.C: New test. * g++.dg/cpp23/wchar-multi1.C: Adjust expected diagnostic wordings. * g++.dg/cpp23/wchar-multi2.C: Likewise. * gcc.dg/c23-utf8char-3.c: Likewise. * gcc.dg/cpp/charconst-4.c: Likewise. * gcc.dg/cpp/charconst.c: Likewise. * gcc.dg/cpp/if-2.c: Likewise. * gcc.dg/utf16-4.c: Likewise. * gcc.dg/utf32-4.c: Likewise. * g++.dg/cpp1z/utf8-neg.C: Likewise. * g++.dg/cpp2a/ucn2.C: Likewise. * g++.dg/ext/utf16-4.C: Likewise. * g++.dg/ext/utf32-4.C: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/g++.dg/cpp1z/utf8-neg.C6
-rw-r--r--gcc/testsuite/g++.dg/cpp23/wchar-multi1.C37
-rw-r--r--gcc/testsuite/g++.dg/cpp23/wchar-multi2.C30
-rw-r--r--gcc/testsuite/g++.dg/cpp26/literals1.C66
-rw-r--r--gcc/testsuite/g++.dg/cpp26/literals2.C68
-rw-r--r--gcc/testsuite/g++.dg/cpp2a/ucn2.C12
-rw-r--r--gcc/testsuite/g++.dg/ext/utf16-4.C7
-rw-r--r--gcc/testsuite/g++.dg/ext/utf32-4.C7
-rw-r--r--gcc/testsuite/gcc.dg/c23-utf8char-3.c4
-rw-r--r--gcc/testsuite/gcc.dg/cpp/charconst-4.c4
-rw-r--r--gcc/testsuite/gcc.dg/cpp/charconst.c10
-rw-r--r--gcc/testsuite/gcc.dg/cpp/if-2.c2
-rw-r--r--gcc/testsuite/gcc.dg/utf16-4.c6
-rw-r--r--gcc/testsuite/gcc.dg/utf32-4.c6
14 files changed, 201 insertions, 64 deletions
diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C
index 7bca124..d9e982f 100644
--- a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C
+++ b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C
@@ -1,6 +1,6 @@
/* { dg-do compile { target c++17 } } */
const static char c0 = u8''; // { dg-error "empty character" }
-const static char c1 = u8'ab'; // { dg-error "character constant too long for its type" }
-const static char c2 = u8'\u0124'; // { dg-error "character constant too long for its type" }
-const static char c3 = u8'\U00064321'; // { dg-error "character constant too long for its type" }
+const static char c1 = u8'ab'; // { dg-error "multi-character literal cannot have an encoding prefix" }
+const static char c2 = u8'\u0124'; // { dg-error "character not encodable in a single code unit" }
+const static char c3 = u8'\U00064321'; // { dg-error "character not encodable in a single code unit" }
diff --git a/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C b/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C
index 77fa3a6..d86dfc9 100644
--- a/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C
+++ b/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C
@@ -4,18 +4,19 @@
char a = 'a';
int b = 'ab'; // { dg-warning "multi-character character constant" }
-int c = '\u05D9'; // { dg-warning "multi-character character constant" }
+int c = '\u05D9'; // { dg-error "character not encodable in a single execution character code unit" }
#if __SIZEOF_INT__ > 2
-int d = '\U0001F525'; // { dg-warning "multi-character character constant" "" { target int32 } }
+int d = '\U0001F525'; // { dg-error "character not encodable in a single execution character code unit" "" { target int32 } }
#endif
-int e = 'abcd'; // { dg-warning "multi-character character constant" }
+int e = 'abcd'; // { dg-warning "multi-character character constant" "" { target int32plus } }
+ // { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" "" { target { ! int32plus } } .-1 }
wchar_t f = L'f';
-wchar_t g = L'gh'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t h = L'ijkl'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t i = L'\U0001F525'; // { dg-error "character constant too long for its type" "" { target { c++23 && { ! 4byte_wchar_t } } } }
- // { dg-warning "character constant too long for its type" "" { target { c++20_down && { ! 4byte_wchar_t } } } .-1 }
+wchar_t g = L'gh'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t h = L'ijkl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t i = L'\U0001F525'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target { c++23 && { ! 4byte_wchar_t } } } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target { c++20_down && { ! 4byte_wchar_t } } } .-1 }
#ifdef __cpp_char8_t
typedef char8_t u8;
#else
@@ -23,20 +24,20 @@ typedef char u8;
#endif
#if __cpp_unicode_characters >= 201411
u8 j = u8'j';
-u8 k = u8'kl'; // { dg-error "character constant too long for its type" "" { target c++17 } }
-u8 l = u8'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++17 } }
+u8 k = u8'kl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+u8 l = u8'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
#endif
#if __cpp_unicode_characters >= 200704
char16_t m = u'm';
-char16_t n = u'no'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t n = u'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
char16_t o = u'\u05D9';
-char16_t p = u'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t p = u'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++11 } }
char32_t q = U'm';
-char32_t r = U'no'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char32_t r = U'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
char32_t s = U'\u05D9';
char32_t t = U'\U0001F525';
#endif
-wchar_t u = L'\u0065\u0301'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t v = L'é'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
+wchar_t u = L'\u0065\u0301'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t v = L'é'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C b/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C
index a63e0ff..270de65 100644
--- a/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C
+++ b/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C
@@ -11,12 +11,12 @@ int d = '\U0001F525'; // { dg-warning "multi-character character constant" "" {
#endif
int e = 'abcd'; // { dg-warning "multi-character character constant" }
wchar_t f = L'f';
-wchar_t g = L'gh'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t h = L'ijkl'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t i = L'\U0001F525'; // { dg-error "character constant too long for its type" "" { target { c++23 } } }
- // { dg-warning "character constant too long for its type" "" { target { c++20_down } } .-1 }
+wchar_t g = L'gh'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t h = L'ijkl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t i = L'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target { c++23 } } }
+ // { dg-warning "character not encodable in a single code unit" "" { target { c++20_down } } .-1 }
#ifdef __cpp_char8_t
typedef char8_t u8;
#else
@@ -24,20 +24,20 @@ typedef char u8;
#endif
#if __cpp_unicode_characters >= 201411
u8 j = u8'j';
-u8 k = u8'kl'; // { dg-error "character constant too long for its type" "" { target c++17 } }
-u8 l = u8'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++17 } }
+u8 k = u8'kl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+u8 l = u8'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
#endif
#if __cpp_unicode_characters >= 200704
char16_t m = u'm';
-char16_t n = u'no'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t n = u'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
char16_t o = u'\u05D9';
-char16_t p = u'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t p = u'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++11 } }
char32_t q = U'm';
-char32_t r = U'no'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char32_t r = U'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
char32_t s = U'\u05D9';
char32_t t = U'\U0001F525';
#endif
-wchar_t u = L'\u0065\u0301'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t v = L'é'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
+wchar_t u = L'\u0065\u0301'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t v = L'é'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp26/literals1.C b/gcc/testsuite/g++.dg/cpp26/literals1.C
new file mode 100644
index 0000000..d51f2f8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp26/literals1.C
@@ -0,0 +1,66 @@
+// C++26 P1854R4 - Making non-encodable string literals ill-formed
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target int32 }
+// { dg-options "-pedantic-errors -finput-charset=UTF-8 -fexec-charset=UTF-8" }
+
+int a = 'abcd'; // { dg-warning "multi-character character constant" }
+int b = '\x61\x62\x63\x64'; // { dg-warning "multi-character character constant" }
+int c = 'á'; // { dg-error "character not encodable in a single execution character code unit" }
+int d = '😁'; // { dg-error "character not encodable in a single execution character code unit" }
+int e = '\N{FACE WITH TEARS OF JOY}'; // { dg-error "character not encodable in a single execution character code unit" }
+ // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 }
+int f = '\U0001F602'; // { dg-error "character not encodable in a single execution character code unit" }
+wchar_t g = L'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t h = L'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t i = L'á';
+char16_t j = u'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" }
+char16_t k = u'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" }
+char16_t l = u'á';
+char16_t m = u'😁'; // { dg-error "character not encodable in a single code unit" }
+char16_t n = u'\N{FACE WITH TEARS OF JOY}'; // { dg-error "character not encodable in a single code unit" { target c++23 } }
+ // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 }
+char16_t o = u'\U0001F602'; // { dg-error "character not encodable in a single code unit" }
+char32_t p = U'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" }
+char32_t q = U'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" }
+char32_t r = U'á';
+char32_t s = U'😁';
+char32_t t = U'\N{FACE WITH TEARS OF JOY}'; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+char32_t u = U'\U0001F602';
+#if __cpp_unicode_characters >= 201411L
+auto v = u8'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+auto w = u8'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+auto x = u8'á'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+auto y = u8'😁'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+auto z = u8'\N{FACE WITH TEARS OF JOY}'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+ // { dg-error "named universal character escapes are only valid in" "" { target { c++17 && c++20_down } } .-1 }
+auto aa = u8'\U0001F602'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+#endif
+const char *ab = "😁";
+const char *ac = "\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+const char *ad = "\U0001F602";
+const char16_t *ae = u"😁";
+const char16_t *af = u"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+const char16_t *ag = u"\U0001F602";
+const char32_t *ah = U"😁";
+const char32_t *ai = U"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+const char32_t *aj = U"\U0001F602";
+auto ak = u8"😁";
+auto al = u8"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+auto am = u8"\U0001F602";
+int an = '\x123456789'; // { dg-error "hex escape sequence out of range" }
+wchar_t ao = L'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" }
+char16_t ap = u'\x12345678'; // { dg-error "hex escape sequence out of range" }
+char32_t aq = U'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" }
+#if __cpp_unicode_characters >= 201411L
+auto ar = u8'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" "" { target c++17 } }
+#endif
+char as = '\xff';
+#if __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 32
+wchar_t at = L'\xffffffff';
+#elif __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 16
+wchar_t at = L'\xffff';
+#endif
+int au = '\x1234'; // { dg-error "hex escape sequence out of range" }
+int av = 'abcdefghijklmnop'; // { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" }
diff --git a/gcc/testsuite/g++.dg/cpp26/literals2.C b/gcc/testsuite/g++.dg/cpp26/literals2.C
new file mode 100644
index 0000000..11e4406
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp26/literals2.C
@@ -0,0 +1,68 @@
+// C++26 P1854R4 - Making non-encodable string literals ill-formed
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target int32 }
+// { dg-options "-pedantic-errors -finput-charset=UTF-8 -fexec-charset=ISO-8859-1" }
+/* { dg-require-iconv "ISO-8859-1" } */
+
+int a = 'abcd'; // { dg-warning "multi-character character constant" }
+int b = '\x61\x62\x63\x64'; // { dg-warning "multi-character character constant" }
+int c = 'á';
+int d = '😁'; // { dg-error "converting to execution character set" }
+int e = '\N{FACE WITH TEARS OF JOY}'; // { dg-error "converting UCN to execution character set" }
+ // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 }
+int f = '\U0001F602'; // { dg-error "converting UCN to execution character set" }
+wchar_t g = L'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t h = L'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t i = L'á';
+char16_t j = u'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" }
+char16_t k = u'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" }
+char16_t l = u'á';
+char16_t m = u'😁'; // { dg-error "character not encodable in a single code unit" }
+char16_t n = u'\N{FACE WITH TEARS OF JOY}'; // { dg-error "character not encodable in a single code unit" { target c++23 } }
+ // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 }
+char16_t o = u'\U0001F602'; // { dg-error "character not encodable in a single code unit" }
+char32_t p = U'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" }
+char32_t q = U'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" }
+char32_t r = U'á';
+char32_t s = U'😁';
+char32_t t = U'\N{FACE WITH TEARS OF JOY}'; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+char32_t u = U'\U0001F602';
+#if __cpp_unicode_characters >= 201411L
+auto v = u8'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+auto w = u8'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+auto x = u8'á'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+auto y = u8'😁'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+auto z = u8'\N{FACE WITH TEARS OF JOY}'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+ // { dg-error "named universal character escapes are only valid in" "" { target { c++17 && c++20_down } } .-1 }
+auto aa = u8'\U0001F602'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+#endif
+const char *ab = "😁"; // { dg-error "converting to execution character set" }
+const char *ac = "\N{FACE WITH TEARS OF JOY}"; // { dg-error "converting UCN to execution character set" }
+ // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 }
+const char *ad = "\U0001F602"; // { dg-error "converting UCN to execution character set" }
+const char16_t *ae = u"😁";
+const char16_t *af = u"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+const char16_t *ag = u"\U0001F602";
+const char32_t *ah = U"😁";
+const char32_t *ai = U"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+const char32_t *aj = U"\U0001F602";
+auto ak = u8"😁";
+auto al = u8"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+auto am = u8"\U0001F602";
+int an = '\x123456789'; // { dg-error "hex escape sequence out of range" }
+wchar_t ao = L'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" }
+char16_t ap = u'\x12345678'; // { dg-error "hex escape sequence out of range" }
+char32_t aq = U'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" }
+#if __cpp_unicode_characters >= 201411L
+auto ar = u8'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" "" { target c++17 } }
+#endif
+char as = '\xff';
+#if __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 32
+wchar_t at = L'\xffffffff';
+#elif __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 16
+wchar_t at = L'\xffff';
+#endif
+int au = '\x1234'; // { dg-error "hex escape sequence out of range" }
+int av = 'abcdefghijklmnop'; // { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" }
diff --git a/gcc/testsuite/g++.dg/cpp2a/ucn2.C b/gcc/testsuite/g++.dg/cpp2a/ucn2.C
index ee7011b..53ee06e 100644
--- a/gcc/testsuite/g++.dg/cpp2a/ucn2.C
+++ b/gcc/testsuite/g++.dg/cpp2a/ucn2.C
@@ -12,18 +12,18 @@ const char32_t *f = U"\uD802"; // { dg-error "is not a valid universal characte
const char32_t *g = U"\U0000DFF0"; // { dg-error "is not a valid universal character" }
const char32_t *h = U"\U00110001"; // { dg-error "is outside the UCS codespace" "" { target c++20 } }
#if __cpp_unicode_characters >= 201411
-const char8_t i = u8'\u00C0'; // { dg-error "character constant too long for its type" "" { target c++17 } }
+const char8_t i = u8'\u00C0'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
#endif
-const char16_t j = u'\U0001F914'; // { dg-error "character constant too long for its type" }
+const char16_t j = u'\U0001F914'; // { dg-error "character not encodable in a single code unit" }
const char32_t k = U'\U0001F914';
#if __cpp_unicode_characters >= 201411
-const char8_t l = u8'ab'; // { dg-error "character constant too long for its type" "" { target c++17 } }
+const char8_t l = u8'ab'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
#endif
-const char16_t m = u'ab'; // { dg-error "character constant too long for its type" }
-const char32_t n = U'ab'; // { dg-error "character constant too long for its type" }
+const char16_t m = u'ab'; // { dg-error "multi-character literal cannot have an encoding prefix" }
+const char32_t n = U'ab'; // { dg-error "multi-character literal cannot have an encoding prefix" }
#if __cpp_unicode_characters >= 201411
const char8_t o = u8'\U00110002'; // { dg-error "is outside the UCS codespace" "" { target c++20 } }
- // { dg-error "character constant too long for its type" "" { target c++17 } .-1 }
+ // { dg-error "character not encodable in a single code unit" "" { target c++17 } .-1 }
#endif
const char16_t p = u'\U00110003'; // { dg-error "is outside the UCS codespace" "" { target c++20 } }
// { dg-error "converting UCN to execution character set" "" { target *-*-* } .-1 }
diff --git a/gcc/testsuite/g++.dg/ext/utf16-4.C b/gcc/testsuite/g++.dg/ext/utf16-4.C
index 030e085..feb8166 100644
--- a/gcc/testsuite/g++.dg/ext/utf16-4.C
+++ b/gcc/testsuite/g++.dg/ext/utf16-4.C
@@ -4,8 +4,8 @@
const static char16_t c0 = u''; /* { dg-error "empty character" } */
-const static char16_t c1 = u'ab'; /* { dg-error "constant too long" } */
-const static char16_t c2 = u'\U00064321'; /* { dg-error "constant too long" } */
+const static char16_t c1 = u'ab'; /* { dg-error "multi-character literal cannot have an encoding prefix" } */
+const static char16_t c2 = u'\U00064321'; /* { dg-error "character not encodable in a single code unit" } */
const static char16_t c3 = 'a';
const static char16_t c4 = U'a';
@@ -14,5 +14,6 @@ const static char16_t c6 = U'\U00064321'; /* { dg-warning "conversion from .char
const static char16_t c7 = L'a';
const static char16_t c8 = L'\u2029';
const static char16_t c9 = L'\U00064321'; /* { dg-warning "conversion from .wchar_t. to .char16_t. changes value from .410401. to .17185." "" { target { 4byte_wchar_t } } } */
- /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } .-1 } */
+ /* { dg-warning "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++20_down } } .-1 } */
+ /* { dg-error "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++23 } } .-2 } */
int main () {}
diff --git a/gcc/testsuite/g++.dg/ext/utf32-4.C b/gcc/testsuite/g++.dg/ext/utf32-4.C
index 96bf0bb..8310bf4 100644
--- a/gcc/testsuite/g++.dg/ext/utf32-4.C
+++ b/gcc/testsuite/g++.dg/ext/utf32-4.C
@@ -3,15 +3,16 @@
/* { dg-do compile { target c++11 } } */
const static char32_t c0 = U''; /* { dg-error "empty character" } */
-const static char32_t c1 = U'ab'; /* { dg-error "constant too long" } */
+const static char32_t c1 = U'ab'; /* { dg-error "multi-character literal cannot have an encoding prefix" } */
const static char32_t c2 = U'\U00064321';
const static char32_t c3 = 'a';
const static char32_t c4 = u'a';
const static char32_t c5 = u'\u2029';
-const static char32_t c6 = u'\U00064321'; /* { dg-error "constant too long" } */
+const static char32_t c6 = u'\U00064321'; /* { dg-error "character not encodable in a single code unit" } */
const static char32_t c7 = L'a';
const static char32_t c8 = L'\u2029';
-const static char32_t c9 = L'\U00064321'; /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } } */
+const static char32_t c9 = L'\U00064321'; /* { dg-warning "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++20_down } } } */
+ /* { dg-error "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++23 } } .-1 } */
int main () {}
diff --git a/gcc/testsuite/gcc.dg/c23-utf8char-3.c b/gcc/testsuite/gcc.dg/c23-utf8char-3.c
index e152edb..0d82af5 100644
--- a/gcc/testsuite/gcc.dg/c23-utf8char-3.c
+++ b/gcc/testsuite/gcc.dg/c23-utf8char-3.c
@@ -3,6 +3,6 @@
/* { dg-options "-std=c23 -pedantic-errors" } */
unsigned char a = u8''; /* { dg-error "empty character constant" } */
-unsigned char b = u8'ab'; /* { dg-error "character constant too long for its type" } */
-unsigned char c = u8'\u00ff'; /* { dg-error "character constant too long for its type" } */
+unsigned char b = u8'ab'; /* { dg-error "multi-character literal cannot have an encoding prefix" } */
+unsigned char c = u8'\u00ff'; /* { dg-error "character not encodable in a single code unit" } */
unsigned char d = u8'\x100'; /* { dg-error "hex escape sequence out of range" } */
diff --git a/gcc/testsuite/gcc.dg/cpp/charconst-4.c b/gcc/testsuite/gcc.dg/cpp/charconst-4.c
index 9ea5e8a..03706c5 100644
--- a/gcc/testsuite/gcc.dg/cpp/charconst-4.c
+++ b/gcc/testsuite/gcc.dg/cpp/charconst-4.c
@@ -38,7 +38,7 @@ extern void abort (void);
# error Charconst incorrectly sign-extended
#endif
-#if LONG_CHARCONST != SHORT_CHARCONST /* { dg-warning "too long" } */
+#if LONG_CHARCONST != SHORT_CHARCONST /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } */
# error Overly long charconst truncates wrongly for preprocessor
#endif
@@ -46,7 +46,7 @@ int main ()
{
if (POS_CHARCONST < 0)
abort ();
- if (LONG_CHARCONST != SHORT_CHARCONST) /* { dg-warning "too long" } */
+ if (LONG_CHARCONST != SHORT_CHARCONST) /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } */
abort ();
return 0;
}
diff --git a/gcc/testsuite/gcc.dg/cpp/charconst.c b/gcc/testsuite/gcc.dg/cpp/charconst.c
index 8934d6a..a2a5717 100644
--- a/gcc/testsuite/gcc.dg/cpp/charconst.c
+++ b/gcc/testsuite/gcc.dg/cpp/charconst.c
@@ -11,9 +11,9 @@
#endif
#if L'' /* { dg-error "empty" "empty wide charconst" } */
#endif
-#if 'very long' /* { dg-warning "too long" "long charconst" } */
+#if 'very long' /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" "long charconst" } */
#endif
-#if L'very long' /* { dg-warning "too long" "long wide charconst" } */
+#if L'very long' /* { dg-warning "multi-character literal cannot have an encoding prefix" "long wide charconst" } */
#endif
/* Don't do this test for L'ab'; it depends upon sizeof (wchar_t). */
#if 'ab' /* { dg-warning "multi-char" "multi-character" } */
@@ -27,10 +27,10 @@ void foo ()
c = ''; /* { dg-error "empty" "empty charconst" } */
w = L''; /* { dg-error "empty" "empty wide charconst" } */
- c = 'very long'; /* { dg-warning "too long" "long charconst" } */
- w = L'very long'; /* { dg-warning "too long" "long wide charconst" } */
+ c = 'very long'; /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" "long charconst" } */
+ w = L'very long'; /* { dg-warning "multi-character literal cannot have an encoding prefix" "long wide charconst" } */
c = 'ab'; /* { dg-warning "multi-char" "multi-char" } */
/* Wide charconsts cannot contain more than one wide character. */
- w = L'ab'; /* { dg-warning "too long" "multi-char wide" } */
+ w = L'ab'; /* { dg-warning "multi-character literal cannot have an encoding prefix" "multi-char wide" } */
}
diff --git a/gcc/testsuite/gcc.dg/cpp/if-2.c b/gcc/testsuite/gcc.dg/cpp/if-2.c
index dc136b4..e969aa0 100644
--- a/gcc/testsuite/gcc.dg/cpp/if-2.c
+++ b/gcc/testsuite/gcc.dg/cpp/if-2.c
@@ -21,7 +21,7 @@
#if 'abcd' /* { dg-warning "(multi-character character constant)|(character constant (is )?too long)" "multi-character charconst" } */
#endif
-#if 'abcdefghi' /* { dg-warning "character constant (is )?too long" "charconst too long" } */
+#if 'abcdefghi' /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } */
#endif
#if '' /* { dg-error "empty character constant" "empty charconst" } */
diff --git a/gcc/testsuite/gcc.dg/utf16-4.c b/gcc/testsuite/gcc.dg/utf16-4.c
index 60e00f6..e108d00 100644
--- a/gcc/testsuite/gcc.dg/utf16-4.c
+++ b/gcc/testsuite/gcc.dg/utf16-4.c
@@ -6,8 +6,8 @@
typedef __CHAR16_TYPE__ char16_t;
char16_t c0 = u''; /* { dg-error "empty character" } */
-char16_t c1 = u'ab'; /* { dg-warning "constant too long" } */
-char16_t c2 = u'\U00064321'; /* { dg-warning "constant too long" } */
+char16_t c1 = u'ab'; /* { dg-warning "multi-character literal cannot have an encoding prefix" } */
+char16_t c2 = u'\U00064321'; /* { dg-warning "character not encodable in a single code unit" } */
char16_t c3 = 'a';
char16_t c4 = U'a';
@@ -16,6 +16,6 @@ char16_t c6 = U'\U00064321'; /* { dg-warning "conversion from .(long )?unsigned
char16_t c7 = L'a';
char16_t c8 = L'\u2029';
char16_t c9 = L'\U00064321'; /* { dg-warning "conversion" "" { target { 4byte_wchar_t } } } */
- /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } .-1 } */
+ /* { dg-warning "character not encodable in a single code unit" "" { target { ! 4byte_wchar_t } } .-1 } */
int main () {}
diff --git a/gcc/testsuite/gcc.dg/utf32-4.c b/gcc/testsuite/gcc.dg/utf32-4.c
index aa7f66a..72086bc 100644
--- a/gcc/testsuite/gcc.dg/utf32-4.c
+++ b/gcc/testsuite/gcc.dg/utf32-4.c
@@ -6,15 +6,15 @@
typedef __CHAR32_TYPE__ char32_t;
char32_t c0 = U''; /* { dg-error "empty character" } */
-char32_t c1 = U'ab'; /* { dg-warning "constant too long" } */
+char32_t c1 = U'ab'; /* { dg-warning "multi-character literal cannot have an encoding prefix" } */
char32_t c2 = U'\U00064321';
char32_t c3 = 'a';
char32_t c4 = u'a';
char32_t c5 = u'\u2029';
-char32_t c6 = u'\U00064321'; /* { dg-warning "constant too long" } */
+char32_t c6 = u'\U00064321'; /* { dg-warning "character not encodable in a single code unit" } */
char32_t c7 = L'a';
char32_t c8 = L'\u2029';
-char32_t c9 = L'\U00064321'; /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } } */
+char32_t c9 = L'\U00064321'; /* { dg-warning "character not encodable in a single code unit" "" { target { ! 4byte_wchar_t } } } */
int main () {}