aboutsummaryrefslogtreecommitdiff
path: root/gcc/testsuite/g++.dg/cpp23
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2023-11-14 18:28:34 +0100
committerJakub Jelinek <jakub@redhat.com>2023-11-14 18:28:34 +0100
commit194825f20619a1c4b51eaea84f20432fefc0db03 (patch)
tree93e0f44cfa40ba14f7585d7aee9464f25b3f15e7 /gcc/testsuite/g++.dg/cpp23
parent948b8b6e0e50958ecf56d4d9fb7ac16f245d9cc3 (diff)
downloadgcc-194825f20619a1c4b51eaea84f20432fefc0db03.zip
gcc-194825f20619a1c4b51eaea84f20432fefc0db03.tar.gz
gcc-194825f20619a1c4b51eaea84f20432fefc0db03.tar.bz2
c++: Implement C++26 P1854R4 - Making non-encodable string literals ill-formed [PR110341]
This paper voted in as DR makes some multi-character literals ill-formed. 'abcd' stays valid, but e.g. 'á' is newly invalid in UTF-8 exec charset while valid e.g. in ISO-8859-1, because it is a single character which needs 2 bytes to be encoded. The following patch does that by checking (only pedantically, especially because it is a DR) if we'd emit a -Wmultichar warning because character constant has more than one byte in it whether the number of source characters is equal to the number of bytes in the multichar string. If it is, it is normal multi-character literal constant and is diagnosed normally with -Wmultichar, otherwise at least one of the c-chars in the sequence was encoded as 2+ bytes. 2023-11-14 Jakub Jelinek <jakub@redhat.com> PR c++/110341 libcpp/ * charset.cc: Implement C++26 P1854R4 - Making non-encodable string literals ill-formed. (one_count_chars, convert_count_chars, count_source_chars): New functions. (narrow_str_to_charconst): Change last arg type from cpp_ttype to const cpp_token *. For C++ if pedantic and i > 1 in CPP_CHAR interpret token also as CPP_STRING32 and if number of characters in the CPP_STRING32 is larger than number of bytes in CPP_CHAR, pedwarn on it. Make the diagnostics more detailed. (wide_str_to_charconst): Change last arg type from cpp_ttype to const cpp_token *. Make the diagnostics more detailed. (cpp_interpret_charconst): Adjust narrow_str_to_charconst and wide_str_to_charconst callers. gcc/testsuite/ * g++.dg/cpp26/literals1.C: New test. * g++.dg/cpp26/literals2.C: New test. * g++.dg/cpp23/wchar-multi1.C: Adjust expected diagnostic wordings. * g++.dg/cpp23/wchar-multi2.C: Likewise. * gcc.dg/c23-utf8char-3.c: Likewise. * gcc.dg/cpp/charconst-4.c: Likewise. * gcc.dg/cpp/charconst.c: Likewise. * gcc.dg/cpp/if-2.c: Likewise. * gcc.dg/utf16-4.c: Likewise. * gcc.dg/utf32-4.c: Likewise. * g++.dg/cpp1z/utf8-neg.C: Likewise. * g++.dg/cpp2a/ucn2.C: Likewise. * g++.dg/ext/utf16-4.C: Likewise. * g++.dg/ext/utf32-4.C: Likewise.
Diffstat (limited to 'gcc/testsuite/g++.dg/cpp23')
-rw-r--r--gcc/testsuite/g++.dg/cpp23/wchar-multi1.C37
-rw-r--r--gcc/testsuite/g++.dg/cpp23/wchar-multi2.C30
2 files changed, 34 insertions, 33 deletions
diff --git a/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C b/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C
index 77fa3a6..d86dfc9 100644
--- a/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C
+++ b/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C
@@ -4,18 +4,19 @@
char a = 'a';
int b = 'ab'; // { dg-warning "multi-character character constant" }
-int c = '\u05D9'; // { dg-warning "multi-character character constant" }
+int c = '\u05D9'; // { dg-error "character not encodable in a single execution character code unit" }
#if __SIZEOF_INT__ > 2
-int d = '\U0001F525'; // { dg-warning "multi-character character constant" "" { target int32 } }
+int d = '\U0001F525'; // { dg-error "character not encodable in a single execution character code unit" "" { target int32 } }
#endif
-int e = 'abcd'; // { dg-warning "multi-character character constant" }
+int e = 'abcd'; // { dg-warning "multi-character character constant" "" { target int32plus } }
+ // { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" "" { target { ! int32plus } } .-1 }
wchar_t f = L'f';
-wchar_t g = L'gh'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t h = L'ijkl'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t i = L'\U0001F525'; // { dg-error "character constant too long for its type" "" { target { c++23 && { ! 4byte_wchar_t } } } }
- // { dg-warning "character constant too long for its type" "" { target { c++20_down && { ! 4byte_wchar_t } } } .-1 }
+wchar_t g = L'gh'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t h = L'ijkl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t i = L'\U0001F525'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target { c++23 && { ! 4byte_wchar_t } } } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target { c++20_down && { ! 4byte_wchar_t } } } .-1 }
#ifdef __cpp_char8_t
typedef char8_t u8;
#else
@@ -23,20 +24,20 @@ typedef char u8;
#endif
#if __cpp_unicode_characters >= 201411
u8 j = u8'j';
-u8 k = u8'kl'; // { dg-error "character constant too long for its type" "" { target c++17 } }
-u8 l = u8'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++17 } }
+u8 k = u8'kl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+u8 l = u8'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
#endif
#if __cpp_unicode_characters >= 200704
char16_t m = u'm';
-char16_t n = u'no'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t n = u'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
char16_t o = u'\u05D9';
-char16_t p = u'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t p = u'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++11 } }
char32_t q = U'm';
-char32_t r = U'no'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char32_t r = U'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
char32_t s = U'\u05D9';
char32_t t = U'\U0001F525';
#endif
-wchar_t u = L'\u0065\u0301'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t v = L'é'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
+wchar_t u = L'\u0065\u0301'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t v = L'é'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C b/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C
index a63e0ff..270de65 100644
--- a/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C
+++ b/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C
@@ -11,12 +11,12 @@ int d = '\U0001F525'; // { dg-warning "multi-character character constant" "" {
#endif
int e = 'abcd'; // { dg-warning "multi-character character constant" }
wchar_t f = L'f';
-wchar_t g = L'gh'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t h = L'ijkl'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t i = L'\U0001F525'; // { dg-error "character constant too long for its type" "" { target { c++23 } } }
- // { dg-warning "character constant too long for its type" "" { target { c++20_down } } .-1 }
+wchar_t g = L'gh'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t h = L'ijkl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t i = L'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target { c++23 } } }
+ // { dg-warning "character not encodable in a single code unit" "" { target { c++20_down } } .-1 }
#ifdef __cpp_char8_t
typedef char8_t u8;
#else
@@ -24,20 +24,20 @@ typedef char u8;
#endif
#if __cpp_unicode_characters >= 201411
u8 j = u8'j';
-u8 k = u8'kl'; // { dg-error "character constant too long for its type" "" { target c++17 } }
-u8 l = u8'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++17 } }
+u8 k = u8'kl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+u8 l = u8'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
#endif
#if __cpp_unicode_characters >= 200704
char16_t m = u'm';
-char16_t n = u'no'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t n = u'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
char16_t o = u'\u05D9';
-char16_t p = u'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t p = u'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++11 } }
char32_t q = U'm';
-char32_t r = U'no'; // { dg-error "character constant too long for its type" "" { target c++11 } }
+char32_t r = U'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
char32_t s = U'\u05D9';
char32_t t = U'\U0001F525';
#endif
-wchar_t u = L'\u0065\u0301'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t v = L'é'; // { dg-error "character constant too long for its type" "" { target c++23 } }
- // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
+wchar_t u = L'\u0065\u0301'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t v = L'é'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+ // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }