aboutsummaryrefslogtreecommitdiff
path: root/gcc/testsuite/c-c++-common/cpp
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2022-09-01 09:48:01 +0200
committerJakub Jelinek <jakub@redhat.com>2022-09-01 09:56:44 +0200
commit0b8c57ed40f19086e30ce54faec3222ac21cc0df (patch)
tree1ce3aa0f19ef45a7d2c03e272d1d8f835bb7f0b6 /gcc/testsuite/c-c++-common/cpp
parentbdfe0d1ce0aebdb68b77e2c04a0f45956c56b449 (diff)
downloadgcc-0b8c57ed40f19086e30ce54faec3222ac21cc0df.zip
gcc-0b8c57ed40f19086e30ce54faec3222ac21cc0df.tar.gz
gcc-0b8c57ed40f19086e30ce54faec3222ac21cc0df.tar.bz2
libcpp: Add -Winvalid-utf8 warning [PR106655]
The following patch introduces a new warning - -Winvalid-utf8 similarly to what clang now has - to diagnose invalid UTF-8 byte sequences in comments, but not just in those, but also in string/character literals and outside of them. The warning is on by default when explicit -finput-charset=UTF-8 is used and C++23 compilation is requested and if -{,W}pedantic or -pedantic-errors it is actually a pedwarn. The reason it is on by default only for -finput-charset=UTF-8 is that the sources often are UTF-8, but sometimes could be some ASCII compatible single byte encoding where non-ASCII characters only appear in comments. So having the warning off by default is IMO desirable. The C++23 pedantic mode for when the source code is UTF-8 is -std=c++23 -pedantic-errors -finput-charset=UTF-8. 2022-09-01 Jakub Jelinek <jakub@redhat.com> PR c++/106655 libcpp/ * include/cpplib.h (struct cpp_options): Implement C++23 P2295R6 - Support for UTF-8 as a portable source file encoding. Add cpp_warn_invalid_utf8 and cpp_input_charset_explicit fields. (enum cpp_warning_reason): Add CPP_W_INVALID_UTF8 enumerator. * init.cc (cpp_create_reader): Initialize cpp_warn_invalid_utf8 and cpp_input_charset_explicit. * charset.cc (_cpp_valid_utf8): Adjust function comment. * lex.cc (UCS_LIMIT): Define. (utf8_continuation): New const variable. (utf8_signifier): Move earlier in the file. (_cpp_warn_invalid_utf8, _cpp_handle_multibyte_utf8): New functions. (_cpp_skip_block_comment): Handle -Winvalid-utf8 warning. (skip_line_comment): Likewise. (lex_raw_string, lex_string): Likewise. (_cpp_lex_direct): Likewise. gcc/ * doc/invoke.texi (-Winvalid-utf8): Document it. gcc/c-family/ * c.opt (-Winvalid-utf8): New warning. * c-opts.cc (c_common_handle_option) <case OPT_finput_charset_>: Set cpp_opts->cpp_input_charset_explicit. (c_common_post_options): If -finput-charset=UTF-8 is explicit in C++23, enable -Winvalid-utf8 by default and if -pedantic or -pedantic-errors, make it a pedwarn. gcc/testsuite/ * c-c++-common/cpp/Winvalid-utf8-1.c: New test. * c-c++-common/cpp/Winvalid-utf8-2.c: New test. * c-c++-common/cpp/Winvalid-utf8-3.c: New test. * g++.dg/cpp23/Winvalid-utf8-1.C: New test. * g++.dg/cpp23/Winvalid-utf8-2.C: New test. * g++.dg/cpp23/Winvalid-utf8-3.C: New test. * g++.dg/cpp23/Winvalid-utf8-4.C: New test. * g++.dg/cpp23/Winvalid-utf8-5.C: New test. * g++.dg/cpp23/Winvalid-utf8-6.C: New test. * g++.dg/cpp23/Winvalid-utf8-7.C: New test. * g++.dg/cpp23/Winvalid-utf8-8.C: New test. * g++.dg/cpp23/Winvalid-utf8-9.C: New test. * g++.dg/cpp23/Winvalid-utf8-10.C: New test. * g++.dg/cpp23/Winvalid-utf8-11.C: New test. * g++.dg/cpp23/Winvalid-utf8-12.C: New test.
Diffstat (limited to 'gcc/testsuite/c-c++-common/cpp')
-rw-r--r--gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c43
-rw-r--r--gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c88
-rw-r--r--gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-3.c27
3 files changed, 158 insertions, 0 deletions
diff --git a/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c
new file mode 100644
index 0000000..0d5a6a7
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-1.c
@@ -0,0 +1,43 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+
+// a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" }
+// aa { dg-warning "invalid UTF-8 character <80>" }
+// aa { dg-warning "invalid UTF-8 character <bf>" }
+// aa { dg-warning "invalid UTF-8 character <c0>" }
+// aa { dg-warning "invalid UTF-8 character <c1>" }
+// aa { dg-warning "invalid UTF-8 character <f5>" }
+// aa { dg-warning "invalid UTF-8 character <ff>" }
+// aa { dg-warning "invalid UTF-8 character <c2>" }
+// aa { dg-warning "invalid UTF-8 character <e0>" }
+// aa { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+// aa { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+// aa { dg-warning "invalid UTF-8 character <e0><bf>" }
+// aa { dg-warning "invalid UTF-8 character <ec><80>" }
+// aa { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+// aa { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+// aa { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+// aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+// aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+// { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+/* a€߿ࠀ퟿𐀀􏿿a { dg-bogus "invalid UTF-8 character" } */
+/* aa { dg-warning "invalid UTF-8 character <80>" } */
+/* aa { dg-warning "invalid UTF-8 character <bf>" } */
+/* aa { dg-warning "invalid UTF-8 character <c0>" } */
+/* aa { dg-warning "invalid UTF-8 character <c1>" } */
+/* aa { dg-warning "invalid UTF-8 character <f5>" } */
+/* aa { dg-warning "invalid UTF-8 character <ff>" } */
+/* aa { dg-warning "invalid UTF-8 character <c2>" } */
+/* aa { dg-warning "invalid UTF-8 character <e0>" } */
+/* aa { dg-warning "invalid UTF-8 character <e0><80><bf>" } */
+/* aa { dg-warning "invalid UTF-8 character <e0><9f><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <e0><bf>" } */
+/* aa { dg-warning "invalid UTF-8 character <ec><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <ed><a0><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <f0><80><80><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" } */
+/* aa { dg-warning "invalid UTF-8 character <f4><90><80><80>" } */
+/* aa { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" } */
+/* { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c
new file mode 100644
index 0000000..9ab69e1
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-2.c
@@ -0,0 +1,88 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess { target { c || c++11 } } }
+// { dg-require-effective-target wchar }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+// { dg-additional-options "-std=gnu99" { target c } }
+
+#ifndef __cplusplus
+#include <wchar.h>
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+#endif
+
+char32_t a = U''; // { dg-warning "invalid UTF-8 character <80>" }
+char32_t b = U''; // { dg-warning "invalid UTF-8 character <bf>" }
+char32_t c = U''; // { dg-warning "invalid UTF-8 character <c0>" }
+char32_t d = U''; // { dg-warning "invalid UTF-8 character <c1>" }
+char32_t e = U''; // { dg-warning "invalid UTF-8 character <f5>" }
+char32_t f = U''; // { dg-warning "invalid UTF-8 character <ff>" }
+char32_t g = U''; // { dg-warning "invalid UTF-8 character <c2>" }
+char32_t h = U''; // { dg-warning "invalid UTF-8 character <e0>" }
+char32_t i = U''; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+char32_t j = U''; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+char32_t k = U''; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+char32_t l = U''; // { dg-warning "invalid UTF-8 character <ec><80>" }
+char32_t m = U''; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+char32_t n = U''; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+char32_t o = U''; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+char32_t p = U''; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+char32_t q = U''; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+const char32_t *A = U"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+const char32_t *B = U""; // { dg-warning "invalid UTF-8 character <80>" }
+const char32_t *C = U""; // { dg-warning "invalid UTF-8 character <bf>" }
+const char32_t *D = U""; // { dg-warning "invalid UTF-8 character <c0>" }
+const char32_t *E = U""; // { dg-warning "invalid UTF-8 character <c1>" }
+const char32_t *F = U""; // { dg-warning "invalid UTF-8 character <f5>" }
+const char32_t *G = U""; // { dg-warning "invalid UTF-8 character <ff>" }
+const char32_t *H = U""; // { dg-warning "invalid UTF-8 character <c2>" }
+const char32_t *I = U""; // { dg-warning "invalid UTF-8 character <e0>" }
+const char32_t *J = U""; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+const char32_t *K = U""; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+const char32_t *L = U""; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+const char32_t *M = U""; // { dg-warning "invalid UTF-8 character <ec><80>" }
+const char32_t *N = U""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+const char32_t *O = U""; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+const char32_t *P = U""; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+const char32_t *Q = U""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+const char32_t *R = U""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+const char32_t *A1 = UR"(€߿ࠀ퟿𐀀􏿿)"; // { dg-bogus "invalid UTF-8 character" }
+const char32_t *B1 = UR"()"; // { dg-warning "invalid UTF-8 character <80>" }
+const char32_t *C1 = UR"()"; // { dg-warning "invalid UTF-8 character <bf>" }
+const char32_t *D1 = UR"()"; // { dg-warning "invalid UTF-8 character <c0>" }
+const char32_t *E1 = UR"()"; // { dg-warning "invalid UTF-8 character <c1>" }
+const char32_t *F1 = UR"()"; // { dg-warning "invalid UTF-8 character <f5>" }
+const char32_t *G1 = UR"()"; // { dg-warning "invalid UTF-8 character <ff>" }
+const char32_t *H1 = UR"()"; // { dg-warning "invalid UTF-8 character <c2>" }
+const char32_t *I1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0>" }
+const char32_t *J1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+const char32_t *K1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+const char32_t *L1 = UR"()"; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+const char32_t *M1 = UR"()"; // { dg-warning "invalid UTF-8 character <ec><80>" }
+const char32_t *N1 = UR"()"; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+const char32_t *O1 = UR"()"; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+const char32_t *P1 = UR"()"; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+const char32_t *Q1 = UR"()"; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+const char32_t *R1 = UR"()"; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
+const char *A2 = u8"€߿ࠀ퟿𐀀􏿿"; // { dg-bogus "invalid UTF-8 character" }
+const char *B2 = u8""; // { dg-warning "invalid UTF-8 character <80>" }
+const char *C2 = u8""; // { dg-warning "invalid UTF-8 character <bf>" }
+const char *D2 = u8""; // { dg-warning "invalid UTF-8 character <c0>" }
+const char *E2 = u8""; // { dg-warning "invalid UTF-8 character <c1>" }
+const char *F2 = u8""; // { dg-warning "invalid UTF-8 character <f5>" }
+const char *G2 = u8""; // { dg-warning "invalid UTF-8 character <ff>" }
+const char *H2 = u8""; // { dg-warning "invalid UTF-8 character <c2>" }
+const char *I2 = u8""; // { dg-warning "invalid UTF-8 character <e0>" }
+const char *J2 = u8""; // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+const char *K2 = u8""; // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+const char *L2 = u8""; // { dg-warning "invalid UTF-8 character <e0><bf>" }
+const char *M2 = u8""; // { dg-warning "invalid UTF-8 character <ec><80>" }
+const char *N2 = u8""; // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+const char *O2 = u8""; // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+const char *P2 = u8""; // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+const char *Q2 = u8""; // { dg-warning "invalid UTF-8 character <f4><90><80><80>" }
+const char *R2 = u8""; // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" }
+ // { dg-warning "invalid UTF-8 character <bf>" "" { target *-*-* } .-1 }
diff --git a/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-3.c b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-3.c
new file mode 100644
index 0000000..4cb230f
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/cpp/Winvalid-utf8-3.c
@@ -0,0 +1,27 @@
+// P2295R6 - Support for UTF-8 as a portable source file encoding
+// This test intentionally contains various byte sequences which are not valid UTF-8
+// { dg-do preprocess }
+// { dg-options "-finput-charset=UTF-8 -Winvalid-utf8" }
+
+#define I(x)
+I(€߿ࠀ퟿𐀀􏿿) // { dg-bogus "invalid UTF-8 character" }
+ // { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
+I() // { dg-warning "invalid UTF-8 character <80>" }
+I() // { dg-warning "invalid UTF-8 character <bf>" }
+I() // { dg-warning "invalid UTF-8 character <c0>" }
+I() // { dg-warning "invalid UTF-8 character <c1>" }
+I() // { dg-warning "invalid UTF-8 character <f5>" }
+I() // { dg-warning "invalid UTF-8 character <ff>" }
+I() // { dg-warning "invalid UTF-8 character <c2>" }
+I() // { dg-warning "invalid UTF-8 character <e0>" }
+I() // { dg-warning "invalid UTF-8 character <e0><80><bf>" }
+I() // { dg-warning "invalid UTF-8 character <e0><9f><80>" }
+I() // { dg-warning "invalid UTF-8 character <e0><bf>" }
+I() // { dg-warning "invalid UTF-8 character <ec><80>" }
+I() // { dg-warning "invalid UTF-8 character <ed><a0><80>" }
+I() // { dg-warning "invalid UTF-8 character <f0><80><80><80>" }
+I() // { dg-warning "invalid UTF-8 character <f0><8f><bf><bf>" }
+I() // { dg-warning "invalid UTF-8 character <f4><90><80><80>" "" { target c } }
+ // { dg-error "is not valid in an identifier" "" { target c++ } .-1 }
+I() // { dg-warning "invalid UTF-8 character <fd><bf><bf><bf>" "" { target c } }
+ // { dg-error "is not valid in an identifier" "" { target c++ } .-1 }