aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2022-09-01 09:48:01 +0200
committerJakub Jelinek <jakub@redhat.com>2022-09-01 09:56:44 +0200
commit0b8c57ed40f19086e30ce54faec3222ac21cc0df (patch)
tree1ce3aa0f19ef45a7d2c03e272d1d8f835bb7f0b6 /gcc
parentbdfe0d1ce0aebdb68b77e2c04a0f45956c56b449 (diff)
downloadgcc-0b8c57ed40f19086e30ce54faec3222ac21cc0df.zip
gcc-0b8c57ed40f19086e30ce54faec3222ac21cc0df.tar.gz
gcc-0b8c57ed40f19086e30ce54faec3222ac21cc0df.tar.bz2
libcpp: Add -Winvalid-utf8 warning [PR106655]
The following patch introduces a new warning - -Winvalid-utf8 similarly to what clang now has - to diagnose invalid UTF-8 byte sequences in comments, but not just in those, but also in string/character literals and outside of them. The warning is on by default when explicit -finput-charset=UTF-8 is used and C++23 compilation is requested and if -{,W}pedantic or -pedantic-errors it is actually a pedwarn. The reason it is on by default only for -finput-charset=UTF-8 is that the sources often are UTF-8, but sometimes could be some ASCII compatible single byte encoding where non-ASCII characters only appear in comments. So having the warning off by default is IMO desirable. The C++23 pedantic mode for when the source code is UTF-8 is -std=c++23 -pedantic-errors -finput-charset=UTF-8. 2022-09-01 Jakub Jelinek <jakub@redhat.com> PR c++/106655 libcpp/ * include/cpplib.h (struct cpp_options): Implement C++23 P2295R6 - Support for UTF-8 as a portable source file encoding. Add cpp_warn_invalid_utf8 and cpp_input_charset_explicit fields. (enum cpp_warning_reason): Add CPP_W_INVALID_UTF8 enumerator. * init.cc (cpp_create_reader): Initialize cpp_warn_invalid_utf8 and cpp_input_charset_explicit. * charset.cc (_cpp_valid_utf8): Adjust function comment. * lex.cc (UCS_LIMIT): Define. (utf8_continuation): New const variable. (utf8_signifier): Move earlier in the file. (_cpp_warn_invalid_utf8, _cpp_handle_multibyte_utf8): New functions. (_cpp_skip_block_comment): Handle -Winvalid-utf8 warning. (skip_line_comment): Likewise. (lex_raw_string, lex_string): Likewise. (_cpp_lex_direct): Likewise. gcc/ * doc/invoke.texi (-Winvalid-utf8): Document it. gcc/c-family/ * c.opt (-Winvalid-utf8): New warning. * c-opts.cc (c_common_handle_option) <case OPT_finput_charset_>: Set cpp_opts->cpp_input_charset_explicit. (c_common_post_options): If -finput-charset=UTF-8 is explicit in C++23, enable -Winvalid-utf8 by default and if -pedantic or -pedantic-errors, make it a pedwarn. gcc/testsuite/ * c-c++-common/cpp/Winvalid-utf8-1.c: New test. * c-c++-common/cpp/Winvalid-utf8-2.c: New test. * c-c++-common/cpp/Winvalid-utf8-3.c: New test. * g++.dg/cpp23/Winvalid-utf8-1.C: New test. * g++.dg/cpp23/Winvalid-utf8-2.C: New test. * g++.dg/cpp23/Winvalid-utf8-3.C: New test. * g++.dg/cpp23/Winvalid-utf8-4.C: New test. * g++.dg/cpp23/Winvalid-utf8-5.C: New test. * g++.dg/cpp23/Winvalid-utf8-6.C: New test. * g++.dg/cpp23/Winvalid-utf8-7.C: New test. * g++.dg/cpp23/Winvalid-utf8-8.C: New test. * g++.dg/cpp23/Winvalid-utf8-9.C: New test. * g++.dg/cpp23/Winvalid-utf8-10.C: New test. * g++.dg/cpp23/Winvalid-utf8-11.C: New test. * g++.dg/cpp23/Winvalid-utf8-12.C: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/c-family/c-opts.cc12
-rw-r--r--gcc/c-family/c.opt4
-rw-r--r--gcc/doc/invoke.texi13
-rw-r--r--gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c43
-rw-r--r--gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c88
-rw-r--r--gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-3.c27
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-1.C43
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-10.C25
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-11.C25
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-12.C25
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-2.C43
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-3.C43
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-4.C43
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-5.C80
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-6.C80
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-7.C80
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-8.C80
-rw-r--r--gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-9.C25
18 files changed, 776 insertions, 3 deletions
diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index 337a524..babaa2f 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -534,6 +534,7 @@ c_common_handle_option (size_t scode, const char *arg, HOST_WIDE_INT value,
case OPT_finput_charset_:
cpp_opts->input_charset = arg;
+ cpp_opts->cpp_input_charset_explicit = 1;
break;
case OPT_ftemplate_depth_:
@@ -1152,6 +1153,17 @@ c_common_post_options (const char **pfilename)
lang_hooks.preprocess_options (parse_in);
cpp_post_options (parse_in);
init_global_opts_from_cpp (&global_options, cpp_get_options (parse_in));
+ /* For C++23 and explicit -finput-charset=UTF-8, turn on -Winvalid-utf8
+ by default and make it a pedwarn unless -Wno-invalid-utf8. */
+ if (cxx_dialect >= cxx23
+ && cpp_opts->cpp_input_charset_explicit
+ && strcmp (cpp_opts->input_charset, "UTF-8") == 0
+ && (cpp_opts->cpp_warn_invalid_utf8
+ || !global_options_set.x_warn_invalid_utf8))
+ {
+ global_options.x_warn_invalid_utf8 = 1;
+ cpp_opts->cpp_warn_invalid_utf8 = cpp_opts->cpp_pedantic ? 2 : 1;
+ }
/* Let diagnostics infrastructure know how to convert input files the same
way libcpp will do it, namely using the configured input charset and
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index f776efd..ff6fe86 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -821,6 +821,10 @@ Winvalid-pch
C ObjC C++ ObjC++ CPP(warn_invalid_pch) CppReason(CPP_W_INVALID_PCH) Var(cpp_warn_invalid_pch) Init(0) Warning
Warn about PCH files that are found but not used.
+Winvalid-utf8
+C objC C++ ObjC++ CPP(cpp_warn_invalid_utf8) CppReason(CPP_W_INVALID_UTF8) Var(warn_invalid_utf8) Init(0) Warning
+Warn about invalid UTF-8 characters in comments.
+
Wjump-misses-init
C ObjC Var(warn_jump_misses_init) Warning LangEnabledby(C ObjC,Wc++-compat)
Warn when a jump misses a variable initialization.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e5eb525..8def6ba 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -365,9 +365,9 @@ Objective-C and Objective-C++ Dialects}.
-Winfinite-recursion @gol
-Winit-self -Winline -Wno-int-conversion -Wint-in-bool-context @gol
-Wno-int-to-pointer-cast -Wno-invalid-memory-model @gol
--Winvalid-pch -Wjump-misses-init -Wlarger-than=@var{byte-size} @gol
--Wlogical-not-parentheses -Wlogical-op -Wlong-long @gol
--Wno-lto-type-mismatch -Wmain -Wmaybe-uninitialized @gol
+-Winvalid-pch -Winvalid-utf8 -Wjump-misses-init @gol
+-Wlarger-than=@var{byte-size} -Wlogical-not-parentheses -Wlogical-op @gol
+-Wlong-long -Wno-lto-type-mismatch -Wmain -Wmaybe-uninitialized @gol
-Wmemset-elt-size -Wmemset-transposed-args @gol
-Wmisleading-indentation -Wmissing-attributes -Wmissing-braces @gol
-Wmissing-field-initializers -Wmissing-format-attribute @gol
@@ -9569,6 +9569,13 @@ different size.
Warn if a precompiled header (@pxref{Precompiled Headers}) is found in
the search path but cannot be used.
+@item -Winvalid-utf8
+@opindex Winvalid-utf8
+@opindex Wno-invalid-utf8
+Warn if an invalid UTF-8 character is found.
+This warning is on by default for C++23 if @option{-finput-charset=UTF-8}
+is used and turned into error with @option{-pedantic-errors}.
+
@item -Wlong-long
@opindex Wlong-long
@opindex Wno-long-long
diff --git a/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c
new file mode 100644
index 0000000..0d5a6a7
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+
+// a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" }
+// aa { dg-warning "invalid UTF-8 character <80>" }
+// aa { dg-warning "invalid UTF-8 character <bf>" }
+// aa { dg-warning "invalid UTF-8 character <c0>" }
+// aa { dg-warning "invalid UTF-8 character <c1>" }
+// aa { dg-warning "invalid UTF-8 character <f5>" }
+// aa { dg-warning "invalid UTF-8 character <ff>" }
+// aa { dg-warning "invalid UTF-8 character <c2>" }
+// aa { dg-warning "invalid UTF-8 character <e0>" }
+// aa { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+// aa { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+// aa { dg-warning "invalid UTF-8 character <e0><bf>" }
+// aa { dg-warning "invalid UTF-8 character <ec><80>" }
+// aa { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+// aa { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+// aa { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+// aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+// aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+// { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+/* a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" } */
+/* aa { dg-warning "invalid UTF-8 character <80>" } */
+/* aa { dg-warning "invalid UTF-8 character <bf>" } */
+/* aa { dg-warning "invalid UTF-8 character <c0>" } */
+/* aa { dg-warning "invalid UTF-8 character <c1>" } */
+/* aa { dg-warning "invalid UTF-8 character <f5>" } */
+/* aa { dg-warning "invalid UTF-8 character <ff>" } */
+/* aa { dg-warning "invalid UTF-8 character <c2>" } */
+/* aa { dg-warning "invalid UTF-8 character <e0>" } */
+/* aa { dg-warning "invalid UTF-8 character <e0><80><bf>" } */
+/* aa { dg-warning "invalid UTF-8 character <e0><9f><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <e0><bf>" } */
+/* aa { dg-warning "invalid UTF-8 character <ec><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <ed><a0><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <f0><80><80><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" } */
+/* aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" } */
+/* { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c
new file mode 100644
index 0000000..9ab69e1
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c
@@ -0,0 +1,88 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target { c || c++11 } } }
+// { dg-require-effective-target wchar }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+// { dg-additional-options "-std=gnu99" { target c } }
+
+#ifndef __cplusplus
+#include <wchar.h>
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+char32_t a = U''; // { dg-warning "invalid UTF-8 character <80>" }
+char32_t b = U''; // { dg-warning "invalid UTF-8 character <bf>" }
+char32_t c = U''; // { dg-warning "invalid UTF-8 character <c0>" }
+char32_t d = U''; // { dg-warning "invalid UTF-8 character <c1>" }
+char32_t e = U''; // { dg-warning "invalid UTF-8 character <f5>" }
+char32_t f = U''; // { dg-warning "invalid UTF-8 character <ff>" }
+char32_t g = U''; // { dg-warning "invalid UTF-8 character <c2>" }
+char32_t h = U''; // { dg-warning "invalid UTF-8 character <e0>" }
+char32_t i = U''; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+char32_t j = U''; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+char32_t k = U''; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+char32_t l = U''; // { dg-warning "invalid UTF-8 character <ec><80>" }
+char32_t m = U''; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+char32_t n = U''; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+char32_t o = U''; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+char32_t p = U''; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+char32_t q = U''; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+const char32_t *A = U"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+const char32_t *B = U""; // { dg-warning "invalid UTF-8 character <80>" }
+const char32_t *C = U""; // { dg-warning "invalid UTF-8 character <bf>" }
+const char32_t *D = U""; // { dg-warning "invalid UTF-8 character <c0>" }
+const char32_t *E = U""; // { dg-warning "invalid UTF-8 character <c1>" }
+const char32_t *F = U""; // { dg-warning "invalid UTF-8 character <f5>" }
+const char32_t *G = U""; // { dg-warning "invalid UTF-8 character <ff>" }
+const char32_t *H = U""; // { dg-warning "invalid UTF-8 character <c2>" }
+const char32_t *I = U""; // { dg-warning "invalid UTF-8 character <e0>" }
+const char32_t *J = U""; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+const char32_t *K = U""; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+const char32_t *L = U""; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+const char32_t *M = U""; // { dg-warning "invalid UTF-8 character <ec><80>" }
+const char32_t *N = U""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+const char32_t *O = U""; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+const char32_t *P = U""; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+const char32_t *Q = U""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+const char32_t *R = U""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+const char32_t *A1 = UR"(€߿ࠀ퟿𐀀􏿿)"; // { dg-bogus "invalid UTF-8 character" }
+const char32_t *B1 = UR"()"; // { dg-warning "invalid UTF-8 character <80>" }
+const char32_t *C1 = UR"()"; // { dg-warning "invalid UTF-8 character <bf>" }
+const char32_t *D1 = UR"()"; // { dg-warning "invalid UTF-8 character <c0>" }
+const char32_t *E1 = UR"()"; // { dg-warning "invalid UTF-8 character <c1>" }
+const char32_t *F1 = UR"()"; // { dg-warning "invalid UTF-8 character <f5>" }
+const char32_t *G1 = UR"()"; // { dg-warning "invalid UTF-8 character <ff>" }
+const char32_t *H1 = UR"()"; // { dg-warning "invalid UTF-8 character <c2>" }
+const char32_t *I1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0>" }
+const char32_t *J1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+const char32_t *K1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+const char32_t *L1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+const char32_t *M1 = UR"()"; // { dg-warning "invalid UTF-8 character <ec><80>" }
+const char32_t *N1 = UR"()"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+const char32_t *O1 = UR"()"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+const char32_t *P1 = UR"()"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+const char32_t *Q1 = UR"()"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+const char32_t *R1 = UR"()"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+const char *A2 = u8"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+const char *B2 = u8""; // { dg-warning "invalid UTF-8 character <80>" }
+const char *C2 = u8""; // { dg-warning "invalid UTF-8 character <bf>" }
+const char *D2 = u8""; // { dg-warning "invalid UTF-8 character <c0>" }
+const char *E2 = u8""; // { dg-warning "invalid UTF-8 character <c1>" }
+const char *F2 = u8""; // { dg-warning "invalid UTF-8 character <f5>" }
+const char *G2 = u8""; // { dg-warning "invalid UTF-8 character <ff>" }
+const char *H2 = u8""; // { dg-warning "invalid UTF-8 character <c2>" }
+const char *I2 = u8""; // { dg-warning "invalid UTF-8 character <e0>" }
+const char *J2 = u8""; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+const char *K2 = u8""; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+const char *L2 = u8""; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+const char *M2 = u8""; // { dg-warning "invalid UTF-8 character <ec><80>" }
+const char *N2 = u8""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+const char *O2 = u8""; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+const char *P2 = u8""; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+const char *Q2 = u8""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+const char *R2 = u8""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
diff --git a/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-3.c b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-3.c
new file mode 100644
index 0000000..4cb230f
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-3.c
@@ -0,0 +1,27 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+
+#define I(x)
+I(€߿ࠀ퟿𐀀􏿿) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
+I() // { dg-warning "invalid UTF-8 character <80>" }
+I() // { dg-warning "invalid UTF-8 character <bf>" }
+I() // { dg-warning "invalid UTF-8 character <c0>" }
+I() // { dg-warning "invalid UTF-8 character <c1>" }
+I() // { dg-warning "invalid UTF-8 character <f5>" }
+I() // { dg-warning "invalid UTF-8 character <ff>" }
+I() // { dg-warning "invalid UTF-8 character <c2>" }
+I() // { dg-warning "invalid UTF-8 character <e0>" }
+I() // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+I() // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+I() // { dg-warning "invalid UTF-8 character <e0><bf>" }
+I() // { dg-warning "invalid UTF-8 character <ec><80>" }
+I() // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+I() // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+I() // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+I() // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c } }
+ // { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
+I() // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c } }
+ // { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-1.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-1.C
new file mode 100644
index 0000000..95e3827
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-1.C
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8" }
+
+// a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" }
+// aa { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+/* a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" } */
+/* aa { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } } */
+/* { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 } */
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-10.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-10.C
new file mode 100644
index 0000000..4684b9d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-10.C
@@ -0,0 +1,25 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic" }
+
+#define I(x)
+I(€߿ࠀ퟿𐀀􏿿) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
+I() // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+I() // { dg-error "is not valid in an identifier" }
+I() // { dg-error "is not valid in an identifier" }
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-11.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-11.C
new file mode 100644
index 0000000..85f04bf
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-11.C
@@ -0,0 +1,25 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
+
+#define I(x)
+I(€߿ࠀ퟿𐀀􏿿) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
+I() // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+I() // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+I() // { dg-error "is not valid in an identifier" }
+I() // { dg-error "is not valid in an identifier" }
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-12.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-12.C
new file mode 100644
index 0000000..6a4091f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-12.C
@@ -0,0 +1,25 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
+
+#define I(x)
+I(€߿ࠀ퟿𐀀􏿿) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
+I() // { dg-bogus "invalid UTF-8 character <80>" }
+I() // { dg-bogus "invalid UTF-8 character <bf>" }
+I() // { dg-bogus "invalid UTF-8 character <c0>" }
+I() // { dg-bogus "invalid UTF-8 character <c1>" }
+I() // { dg-bogus "invalid UTF-8 character <f5>" }
+I() // { dg-bogus "invalid UTF-8 character <ff>" }
+I() // { dg-bogus "invalid UTF-8 character <c2>" }
+I() // { dg-bogus "invalid UTF-8 character <e0>" }
+I() // { dg-bogus "invalid UTF-8 character <e0><80><bf>" }
+I() // { dg-bogus "invalid UTF-8 character <e0><9f><80>" }
+I() // { dg-bogus "invalid UTF-8 character <e0><bf>" }
+I() // { dg-bogus "invalid UTF-8 character <ec><80>" }
+I() // { dg-bogus "invalid UTF-8 character <ed><a0><80>" }
+I() // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" }
+I() // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" }
+I() // { dg-error "is not valid in an identifier" }
+I() // { dg-error "is not valid in an identifier" }
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-2.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-2.C
new file mode 100644
index 0000000..70ab8e5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-2.C
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic" }
+
+// a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" }
+// aa { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+// aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+// { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+/* a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" } */
+/* aa { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } } */
+/* aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } } */
+/* { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 } */
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-3.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-3.C
new file mode 100644
index 0000000..c0f748b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-3.C
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
+
+// a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" }
+// aa { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+// aa { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+// { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+/* a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" } */
+/* aa { dg-error "invalid UTF-8 character <80>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } } */
+/* aa { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } } */
+/* { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 } */
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-4.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-4.C
new file mode 100644
index 0000000..1dc65e3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-4.C
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
+
+// a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" }
+// aa { dg-bogus "invalid UTF-8 character <80>" }
+// aa { dg-bogus "invalid UTF-8 character <bf>" }
+// aa { dg-bogus "invalid UTF-8 character <c0>" }
+// aa { dg-bogus "invalid UTF-8 character <c1>" }
+// aa { dg-bogus "invalid UTF-8 character <f5>" }
+// aa { dg-bogus "invalid UTF-8 character <ff>" }
+// aa { dg-bogus "invalid UTF-8 character <c2>" }
+// aa { dg-bogus "invalid UTF-8 character <e0>" }
+// aa { dg-bogus "invalid UTF-8 character <e0><80><bf>" }
+// aa { dg-bogus "invalid UTF-8 character <e0><9f><80>" }
+// aa { dg-bogus "invalid UTF-8 character <e0><bf>" }
+// aa { dg-bogus "invalid UTF-8 character <ec><80>" }
+// aa { dg-bogus "invalid UTF-8 character <ed><a0><80>" }
+// aa { dg-bogus "invalid UTF-8 character <f0><80><80><80>" }
+// aa { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" }
+// aa { dg-bogus "invalid UTF-8 character <f4><90><80><80>" }
+// aa { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" }
+// { dg-bogus "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+/* a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" } */
+/* aa { dg-bogus "invalid UTF-8 character <80>" } */
+/* aa { dg-bogus "invalid UTF-8 character <bf>" } */
+/* aa { dg-bogus "invalid UTF-8 character <c0>" } */
+/* aa { dg-bogus "invalid UTF-8 character <c1>" } */
+/* aa { dg-bogus "invalid UTF-8 character <f5>" } */
+/* aa { dg-bogus "invalid UTF-8 character <ff>" } */
+/* aa { dg-bogus "invalid UTF-8 character <c2>" } */
+/* aa { dg-bogus "invalid UTF-8 character <e0>" } */
+/* aa { dg-bogus "invalid UTF-8 character <e0><80><bf>" } */
+/* aa { dg-bogus "invalid UTF-8 character <e0><9f><80>" } */
+/* aa { dg-bogus "invalid UTF-8 character <e0><bf>" } */
+/* aa { dg-bogus "invalid UTF-8 character <ec><80>" } */
+/* aa { dg-bogus "invalid UTF-8 character <ed><a0><80>" } */
+/* aa { dg-bogus "invalid UTF-8 character <f0><80><80><80>" } */
+/* aa { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" } */
+/* aa { dg-bogus "invalid UTF-8 character <f4><90><80><80>" } */
+/* aa { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" } */
+/* { dg-bogus "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-5.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-5.C
new file mode 100644
index 0000000..f0140ba
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-5.C
@@ -0,0 +1,80 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8" }
+
+char32_t a = U''; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+char32_t b = U''; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+char32_t c = U''; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+char32_t d = U''; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+char32_t e = U''; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+char32_t f = U''; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+char32_t g = U''; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+char32_t h = U''; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+char32_t i = U''; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+char32_t j = U''; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+char32_t k = U''; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+char32_t l = U''; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+char32_t m = U''; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+char32_t n = U''; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+char32_t o = U''; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+char32_t p = U''; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+char32_t q = U''; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A = U"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+auto B = U""; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C = U""; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D = U""; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E = U""; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F = U""; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G = U""; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H = U""; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I = U""; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J = U""; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K = U""; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L = U""; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M = U""; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N = U""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O = U""; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P = U""; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q = U""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R = U""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A1 = UR"(€߿ࠀ퟿𐀀􏿿)"; // { dg-bogus "invalid UTF-8 character" }
+auto B1 = UR"()"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C1 = UR"()"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D1 = UR"()"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E1 = UR"()"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F1 = UR"()"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G1 = UR"()"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H1 = UR"()"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M1 = UR"()"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N1 = UR"()"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O1 = UR"()"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P1 = UR"()"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q1 = UR"()"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R1 = UR"()"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A2 = u8"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8""; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C2 = u8""; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D2 = u8""; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E2 = u8""; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F2 = u8""; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G2 = u8""; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H2 = u8""; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I2 = u8""; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J2 = u8""; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K2 = u8""; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L2 = u8""; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M2 = u8""; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N2 = u8""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O2 = u8""; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P2 = u8""; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q2 = u8""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R2 = u8""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-6.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-6.C
new file mode 100644
index 0000000..01023d3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-6.C
@@ -0,0 +1,80 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8 -pedantic" }
+
+char32_t a = U''; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+char32_t b = U''; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+char32_t c = U''; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+char32_t d = U''; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+char32_t e = U''; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+char32_t f = U''; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+char32_t g = U''; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+char32_t h = U''; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+char32_t i = U''; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+char32_t j = U''; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+char32_t k = U''; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+char32_t l = U''; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+char32_t m = U''; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+char32_t n = U''; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+char32_t o = U''; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+char32_t p = U''; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+char32_t q = U''; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A = U"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+auto B = U""; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C = U""; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D = U""; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E = U""; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F = U""; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G = U""; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H = U""; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I = U""; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J = U""; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K = U""; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L = U""; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M = U""; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N = U""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O = U""; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P = U""; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q = U""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R = U""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A1 = UR"(€߿ࠀ퟿𐀀􏿿)"; // { dg-bogus "invalid UTF-8 character" }
+auto B1 = UR"()"; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C1 = UR"()"; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D1 = UR"()"; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E1 = UR"()"; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F1 = UR"()"; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G1 = UR"()"; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H1 = UR"()"; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M1 = UR"()"; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N1 = UR"()"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O1 = UR"()"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P1 = UR"()"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q1 = UR"()"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R1 = UR"()"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A2 = u8"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8""; // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C2 = u8""; // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D2 = u8""; // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E2 = u8""; // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F2 = u8""; // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G2 = u8""; // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H2 = u8""; // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I2 = u8""; // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J2 = u8""; // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K2 = u8""; // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L2 = u8""; // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M2 = u8""; // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N2 = u8""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O2 = u8""; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P2 = u8""; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q2 = u8""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R2 = u8""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-7.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-7.C
new file mode 100644
index 0000000..7991a64
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-7.C
@@ -0,0 +1,80 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors" }
+
+char32_t a = U''; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+char32_t b = U''; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+char32_t c = U''; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+char32_t d = U''; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+char32_t e = U''; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+char32_t f = U''; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+char32_t g = U''; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+char32_t h = U''; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+char32_t i = U''; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+char32_t j = U''; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+char32_t k = U''; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+char32_t l = U''; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+char32_t m = U''; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+char32_t n = U''; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+char32_t o = U''; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+char32_t p = U''; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+char32_t q = U''; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A = U"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+auto B = U""; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C = U""; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D = U""; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E = U""; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F = U""; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G = U""; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H = U""; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I = U""; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J = U""; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K = U""; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L = U""; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M = U""; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N = U""; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O = U""; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P = U""; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q = U""; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R = U""; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A1 = UR"(€߿ࠀ퟿𐀀􏿿)"; // { dg-bogus "invalid UTF-8 character" }
+auto B1 = UR"()"; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C1 = UR"()"; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D1 = UR"()"; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E1 = UR"()"; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F1 = UR"()"; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G1 = UR"()"; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H1 = UR"()"; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I1 = UR"()"; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J1 = UR"()"; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K1 = UR"()"; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L1 = UR"()"; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M1 = UR"()"; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N1 = UR"()"; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O1 = UR"()"; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P1 = UR"()"; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q1 = UR"()"; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R1 = UR"()"; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A2 = u8"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8""; // { dg-error "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C2 = u8""; // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D2 = u8""; // { dg-error "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E2 = u8""; // { dg-error "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F2 = u8""; // { dg-error "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G2 = u8""; // { dg-error "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H2 = u8""; // { dg-error "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I2 = u8""; // { dg-error "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J2 = u8""; // { dg-error "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K2 = u8""; // { dg-error "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L2 = u8""; // { dg-error "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M2 = u8""; // { dg-error "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N2 = u8""; // { dg-error "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O2 = u8""; // { dg-error "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P2 = u8""; // { dg-error "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q2 = u8""; // { dg-error "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R2 = u8""; // { dg-error "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-error "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-8.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-8.C
new file mode 100644
index 0000000..95c8a91
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-8.C
@@ -0,0 +1,80 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target c++11 } }
+// { dg-options "-finput-charset=UTF-8 -pedantic-errors -Wno-invalid-utf8" }
+
+char32_t a = U''; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
+char32_t b = U''; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
+char32_t c = U''; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
+char32_t d = U''; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
+char32_t e = U''; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
+char32_t f = U''; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
+char32_t g = U''; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
+char32_t h = U''; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
+char32_t i = U''; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+char32_t j = U''; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+char32_t k = U''; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+char32_t l = U''; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+char32_t m = U''; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+char32_t n = U''; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+char32_t o = U''; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+char32_t p = U''; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+char32_t q = U''; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A = U"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+auto B = U""; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C = U""; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D = U""; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E = U""; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F = U""; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G = U""; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H = U""; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I = U""; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J = U""; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K = U""; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L = U""; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M = U""; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N = U""; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O = U""; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P = U""; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q = U""; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R = U""; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A1 = UR"(€߿ࠀ퟿𐀀􏿿)"; // { dg-bogus "invalid UTF-8 character" }
+auto B1 = UR"()"; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C1 = UR"()"; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D1 = UR"()"; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E1 = UR"()"; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F1 = UR"()"; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G1 = UR"()"; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H1 = UR"()"; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I1 = UR"()"; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J1 = UR"()"; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K1 = UR"()"; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L1 = UR"()"; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M1 = UR"()"; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N1 = UR"()"; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O1 = UR"()"; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P1 = UR"()"; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q1 = UR"()"; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R1 = UR"()"; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
+auto A2 = u8"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+auto B2 = u8""; // { dg-bogus "invalid UTF-8 character <80>" "" { target c++23 } }
+auto C2 = u8""; // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } }
+auto D2 = u8""; // { dg-bogus "invalid UTF-8 character <c0>" "" { target c++23 } }
+auto E2 = u8""; // { dg-bogus "invalid UTF-8 character <c1>" "" { target c++23 } }
+auto F2 = u8""; // { dg-bogus "invalid UTF-8 character <f5>" "" { target c++23 } }
+auto G2 = u8""; // { dg-bogus "invalid UTF-8 character <ff>" "" { target c++23 } }
+auto H2 = u8""; // { dg-bogus "invalid UTF-8 character <c2>" "" { target c++23 } }
+auto I2 = u8""; // { dg-bogus "invalid UTF-8 character <e0>" "" { target c++23 } }
+auto J2 = u8""; // { dg-bogus "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+auto K2 = u8""; // { dg-bogus "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+auto L2 = u8""; // { dg-bogus "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+auto M2 = u8""; // { dg-bogus "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+auto N2 = u8""; // { dg-bogus "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+auto O2 = u8""; // { dg-bogus "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+auto P2 = u8""; // { dg-bogus "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+auto Q2 = u8""; // { dg-bogus "invalid UTF-8 character <f4><90><80><80>" "" { target c++23 } }
+auto R2 = u8""; // { dg-bogus "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c++23 } }
+ // { dg-bogus "invalid UTF-8 character <bf>" "" { target c++23 } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-9.C b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-9.C
new file mode 100644
index 0000000..0afc945
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/Winvalid-utf8-9.C
@@ -0,0 +1,25 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8" }
+
+#define I(x)
+I(€߿ࠀ퟿𐀀􏿿) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target *-*-* } .-1 }
+I() // { dg-warning "invalid UTF-8 character <80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <bf>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <c0>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <c1>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <f5>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <ff>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <c2>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <e0>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <e0><80><bf>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <e0><9f><80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <e0><bf>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <ec><80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <ed><a0><80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <f0><80><80><80>" "" { target c++23 } }
+I() // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" "" { target c++23 } }
+I() // { dg-error "is not valid in an identifier" }
+I() // { dg-error "is not valid in an identifier" }