aboutsummaryrefslogtreecommitdiff
path: root/gcc/c-family/c-opts.cc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2022-09-01 09:48:01 +0200
committerJakub Jelinek <jakub@redhat.com>2022-09-01 09:56:44 +0200
commit0b8c57ed40f19086e30ce54faec3222ac21cc0df (patch)
tree1ce3aa0f19ef45a7d2c03e272d1d8f835bb7f0b6 /gcc/c-family/c-opts.cc
parentbdfe0d1ce0aebdb68b77e2c04a0f45956c56b449 (diff)
downloadgcc-0b8c57ed40f19086e30ce54faec3222ac21cc0df.zip
gcc-0b8c57ed40f19086e30ce54faec3222ac21cc0df.tar.gz
gcc-0b8c57ed40f19086e30ce54faec3222ac21cc0df.tar.bz2
libcpp: Add -Winvalid-utf8 warning [PR106655]
The following patch introduces a new warning - -Winvalid-utf8 similarly to what clang now has - to diagnose invalid UTF-8 byte sequences in comments, but not just in those, but also in string/character literals and outside of them. The warning is on by default when explicit -finput-charset=UTF-8 is used and C++23 compilation is requested and if -{,W}pedantic or -pedantic-errors it is actually a pedwarn. The reason it is on by default only for -finput-charset=UTF-8 is that the sources often are UTF-8, but sometimes could be some ASCII compatible single byte encoding where non-ASCII characters only appear in comments. So having the warning off by default is IMO desirable. The C++23 pedantic mode for when the source code is UTF-8 is -std=c++23 -pedantic-errors -finput-charset=UTF-8. 2022-09-01 Jakub Jelinek <jakub@redhat.com> PR c++/106655 libcpp/ * include/cpplib.h (struct cpp_options): Implement C++23 P2295R6 - Support for UTF-8 as a portable source file encoding. Add cpp_warn_invalid_utf8 and cpp_input_charset_explicit fields. (enum cpp_warning_reason): Add CPP_W_INVALID_UTF8 enumerator. * init.cc (cpp_create_reader): Initialize cpp_warn_invalid_utf8 and cpp_input_charset_explicit. * charset.cc (_cpp_valid_utf8): Adjust function comment. * lex.cc (UCS_LIMIT): Define. (utf8_continuation): New const variable. (utf8_signifier): Move earlier in the file. (_cpp_warn_invalid_utf8, _cpp_handle_multibyte_utf8): New functions. (_cpp_skip_block_comment): Handle -Winvalid-utf8 warning. (skip_line_comment): Likewise. (lex_raw_string, lex_string): Likewise. (_cpp_lex_direct): Likewise. gcc/ * doc/invoke.texi (-Winvalid-utf8): Document it. gcc/c-family/ * c.opt (-Winvalid-utf8): New warning. * c-opts.cc (c_common_handle_option) <case OPT_finput_charset_>: Set cpp_opts->cpp_input_charset_explicit. (c_common_post_options): If -finput-charset=UTF-8 is explicit in C++23, enable -Winvalid-utf8 by default and if -pedantic or -pedantic-errors, make it a pedwarn. gcc/testsuite/ * c-c++-common/cpp/Winvalid-utf8-1.c: New test. * c-c++-common/cpp/Winvalid-utf8-2.c: New test. * c-c++-common/cpp/Winvalid-utf8-3.c: New test. * g++.dg/cpp23/Winvalid-utf8-1.C: New test. * g++.dg/cpp23/Winvalid-utf8-2.C: New test. * g++.dg/cpp23/Winvalid-utf8-3.C: New test. * g++.dg/cpp23/Winvalid-utf8-4.C: New test. * g++.dg/cpp23/Winvalid-utf8-5.C: New test. * g++.dg/cpp23/Winvalid-utf8-6.C: New test. * g++.dg/cpp23/Winvalid-utf8-7.C: New test. * g++.dg/cpp23/Winvalid-utf8-8.C: New test. * g++.dg/cpp23/Winvalid-utf8-9.C: New test. * g++.dg/cpp23/Winvalid-utf8-10.C: New test. * g++.dg/cpp23/Winvalid-utf8-11.C: New test. * g++.dg/cpp23/Winvalid-utf8-12.C: New test.
Diffstat (limited to 'gcc/c-family/c-opts.cc')
-rw-r--r--gcc/c-family/c-opts.cc12
1 files changed, 12 insertions, 0 deletions
diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index 337a524..babaa2f 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -534,6 +534,7 @@ c_common_handle_option (size_t scode, const char *arg, HOST_WIDE_INT value,
case OPT_finput_charset_:
cpp_opts->input_charset = arg;
+ cpp_opts->cpp_input_charset_explicit = 1;
break;
case OPT_ftemplate_depth_:
@@ -1152,6 +1153,17 @@ c_common_post_options (const char **pfilename)
lang_hooks.preprocess_options (parse_in);
cpp_post_options (parse_in);
init_global_opts_from_cpp (&global_options, cpp_get_options (parse_in));
+ /* For C++23 and explicit -finput-charset=UTF-8, turn on -Winvalid-utf8
+ by default and make it a pedwarn unless -Wno-invalid-utf8. */
+ if (cxx_dialect >= cxx23
+ && cpp_opts->cpp_input_charset_explicit
+ && strcmp (cpp_opts->input_charset, "UTF-8") == 0
+ && (cpp_opts->cpp_warn_invalid_utf8
+ || !global_options_set.x_warn_invalid_utf8))
+ {
+ global_options.x_warn_invalid_utf8 = 1;
+ cpp_opts->cpp_warn_invalid_utf8 = cpp_opts->cpp_pedantic ? 2 : 1;
+ }
/* Let diagnostics infrastructure know how to convert input files the same
way libcpp will do it, namely using the configured input charset and