diff options
author | Marek Polacek <polacek@redhat.com> | 2022-01-19 19:05:22 -0500 |
---|---|---|
committer | Marek Polacek <polacek@redhat.com> | 2022-01-24 17:48:23 -0500 |
commit | ae36f839632ddb67a53c26e9c7e73b0f56c4c11b (patch) | |
tree | 79e2972dff52b095ab4272a276dd1042aa6eb23e /gcc | |
parent | e89d0befe3ec3238fca6de2cb078eb403b8c7e99 (diff) | |
download | gcc-ae36f839632ddb67a53c26e9c7e73b0f56c4c11b.zip gcc-ae36f839632ddb67a53c26e9c7e73b0f56c4c11b.tar.gz gcc-ae36f839632ddb67a53c26e9c7e73b0f56c4c11b.tar.bz2 |
preprocessor: -Wbidi-chars and UCNs [PR104030]
Stephan Bergmann reported that our -Wbidi-chars breaks the build
of LibreOffice because we warn about UCNs even when their usage
is correct: LibreOffice constructs strings piecewise, as in:
aText = u"\u202D" + aText;
and warning about that is overzealous. Since no editor (AFAIK)
interprets UCNs to show them as Unicode characters, there's less
risk in misinterpreting them, and so perhaps we shouldn't warn
about them by default. However, identifiers containing UCNs or
programs generating other programs could still cause confusion,
so I'm keeping the UCN checking. To turn it on, you just need
to use -Wbidi-chars=unpaired,ucn or -Wbidi-chars=any,ucn.
The implementation is done by using the new EnumSet feature.
PR preprocessor/104030
gcc/c-family/ChangeLog:
* c.opt (Wbidi-chars): Mark as EnumSet. Also accept =ucn.
gcc/ChangeLog:
* doc/invoke.texi: Update documentation for -Wbidi-chars.
libcpp/ChangeLog:
* include/cpplib.h (enum cpp_bidirectional_level): Add
bidirectional_ucn. Set values explicitly.
* internal.h (cpp_reader): Adjust warn_bidi_p.
* lex.cc (maybe_warn_bidi_on_close): Don't warn about UCNs
unless UCN checking is on.
(maybe_warn_bidi_on_char): Likewise.
gcc/testsuite/ChangeLog:
* c-c++-common/Wbidi-chars-10.c: Turn on UCN checking.
* c-c++-common/Wbidi-chars-11.c: Likewise.
* c-c++-common/Wbidi-chars-14.c: Likewise.
* c-c++-common/Wbidi-chars-16.c: Likewise.
* c-c++-common/Wbidi-chars-17.c: Likewise.
* c-c++-common/Wbidi-chars-4.c: Likewise.
* c-c++-common/Wbidi-chars-5.c: Likewise.
* c-c++-common/Wbidi-chars-6.c: Likewise.
* c-c++-common/Wbidi-chars-7.c: Likewise.
* c-c++-common/Wbidi-chars-8.c: Likewise.
* c-c++-common/Wbidi-chars-9.c: Likewise.
* c-c++-common/Wbidi-chars-ranges.c: Likewise.
* c-c++-common/Wbidi-chars-18.c: New test.
* c-c++-common/Wbidi-chars-19.c: New test.
* c-c++-common/Wbidi-chars-20.c: New test.
* c-c++-common/Wbidi-chars-21.c: New test.
* c-c++-common/Wbidi-chars-22.c: New test.
* c-c++-common/Wbidi-chars-23.c: New test.
Diffstat (limited to 'gcc')
20 files changed, 92 insertions, 19 deletions
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index db65c14..9cfd2a6 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -379,8 +379,8 @@ C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none) ; Wbidi-chars= -C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level) --Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters. +C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level) EnumSet +-Wbidi-chars=[none|unpaired|any|ucn] Warn about UTF-8 bidirectional control characters. ; Required for these enum values. SourceInclude @@ -390,13 +390,16 @@ Enum Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized) EnumValue -Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none) +Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none) Set(1) EnumValue -Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired) +Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired) Set(1) EnumValue -Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any) +Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any) Set(1) + +EnumValue +Enum(cpp_bidirectional_level) String(ucn) Value(bidirectional_ucn) Set(2) Wbool-compare C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 309f5e3..9e588db 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -328,7 +328,7 @@ Objective-C and Objective-C++ Dialects}. -Warray-bounds -Warray-bounds=@var{n} -Warray-compare @gol -Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol -Wno-attribute-warning @gol --Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol +-Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{|}ucn@r{]} @gol -Wbool-compare -Wbool-operation @gol -Wno-builtin-declaration-mismatch @gol -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol @@ -7803,7 +7803,7 @@ Attributes considered include @code{alloc_align}, @code{alloc_size}, This is the default. You can disable these warnings with either @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}. -@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} +@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{|}ucn@r{]} @opindex Wbidi-chars= @opindex Wbidi-chars @opindex Wno-bidi-chars @@ -7820,6 +7820,10 @@ bidi contexts. @option{-Wbidi-chars=none} turns the warning off. @option{-Wbidi-chars=any} warns about any use of bidirectional control characters. +By default, this warning does not warn about UCNs. It is, however, possible +to turn on such checking by using @option{-Wbidi-chars=unpaired,ucn} or +@option{-Wbidi-chars=any,ucn}. + @item -Wbool-compare @opindex Wno-bool-compare @opindex Wbool-compare diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c index 3f851b6..cdcdce2 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=unpaired" } */ +/* { dg-options "-Wbidi-chars=unpaired,ucn" } */ /* More nesting testing. */ /* RLE LRI PDF PDI*/ diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c index 270ce23..ea83029 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=unpaired" } */ +/* { dg-options "-Wbidi-chars=unpaired,ucn" } */ /* Test that we warn when mixing UCN and UTF-8. */ int LRE__PDF_\u202c; diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c index ba5f75d..cb6b05e 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=unpaired" } */ +/* { dg-options "-Wbidi-chars=unpaired,ucn" } */ /* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs, or RLOs. */ diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c index baa0159..eaf0ec9 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=any" } */ +/* { dg-options "-Wbidi-chars=any,ucn" } */ /* Test LTR/RTL chars. */ /* LTR<> */ diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c index 07cb432..3419221 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=unpaired" } */ +/* { dg-options "-Wbidi-chars=unpaired,ucn" } */ /* Test LTR/RTL chars. */ /* LTR<> */ diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-18.c b/gcc/testsuite/c-c++-common/Wbidi-chars-18.c new file mode 100644 index 0000000..ae586d5 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-18.c @@ -0,0 +1,11 @@ +/* PR preprocessor/104030 */ +/* { dg-do compile } */ +/* By default, don't warn about UCNs. */ + +const char * +fn () +{ + const char *aText = "\u202D" "abc"; +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ + return aText; +} diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-19.c b/gcc/testsuite/c-c++-common/Wbidi-chars-19.c new file mode 100644 index 0000000..9985c3b --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-19.c @@ -0,0 +1,11 @@ +/* PR preprocessor/104030 */ +/* { dg-do compile } */ +/* { dg-options "-Wbidi-chars=unpaired,ucn" } */ + +const char * +fn () +{ + const char *aText = "\u202D" "abc"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + return aText; +} diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-20.c b/gcc/testsuite/c-c++-common/Wbidi-chars-20.c new file mode 100644 index 0000000..859f3d5 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-20.c @@ -0,0 +1,11 @@ +/* PR preprocessor/104030 */ +/* { dg-do compile } */ +/* { dg-options "-Wbidi-chars=any" } */ + +const char * +fn () +{ + const char *aText = "\u202D" "abc"; +/* { dg-bogus "U\\+202D" "" { target *-*-* } .-1 } */ + return aText; +} diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-21.c b/gcc/testsuite/c-c++-common/Wbidi-chars-21.c new file mode 100644 index 0000000..2720b8a --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-21.c @@ -0,0 +1,11 @@ +/* PR preprocessor/104030 */ +/* { dg-do compile } */ +/* { dg-options "-Wbidi-chars=ucn,any" } */ + +const char * +fn () +{ + const char *aText = "\u202D" "abc"; +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ + return aText; +} diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-22.c b/gcc/testsuite/c-c++-common/Wbidi-chars-22.c new file mode 100644 index 0000000..f960e59 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-22.c @@ -0,0 +1,11 @@ +/* PR preprocessor/104030 */ +/* { dg-do compile } */ +/* { dg-options "-Wbidi-chars=none,ucn" } */ + +const char * +fn () +{ + const char *aText = "\u202D" "abc"; +/* { dg-bogus "" "" { target *-*-* } .-1 } */ + return aText; +} diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-23.c b/gcc/testsuite/c-c++-common/Wbidi-chars-23.c new file mode 100644 index 0000000..7de0a11 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-23.c @@ -0,0 +1,11 @@ +/* PR preprocessor/104030 */ +/* { dg-do compile } */ +/* { dg-options "-Wbidi-chars=ucn" } */ + +const char * +fn () +{ + const char *aText = "\u202D" "abc"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + return aText; +} diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c index 639e5c6..d2f0739 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */ +/* { dg-options "-Wbidi-chars=any,ucn -Wno-multichar -Wno-overflow" } */ /* Test all bidi chars in various contexts (identifiers, comments, string literals, character constants), both UCN and UTF-8. The bidi chars here are properly terminated, except for the character constants. */ diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c index 68cb053..ad49498 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */ +/* { dg-options "-Wbidi-chars=unpaired,ucn -Wno-multichar -Wno-overflow" } */ /* Test all bidi chars in various contexts (identifiers, comments, string literals, character constants), both UCN and UTF-8. The bidi chars here are properly terminated, except for the character constants. */ diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c index 0ce6fff..8c1c1b2 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=unpaired" } */ +/* { dg-options "-Wbidi-chars=ucn,unpaired" } */ /* Test nesting of bidi chars in various contexts. */ /* Terminated by the wrong char: */ diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c index d012d42..3270952 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=any" } */ +/* { dg-options "-Wbidi-chars=any,ucn" } */ /* Test we ignore UCNs in comments. */ // a b c \u202a 1 2 3 diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c index 4f54c50..3983168 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=any" } */ +/* { dg-options "-Wbidi-chars=any,ucn" } */ /* Test \u vs \U. */ int a_\u202A; diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c index e2af1b1..0ddb0d9 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=unpaired" } */ +/* { dg-options "-Wbidi-chars=unpaired,ucn" } */ /* Test that we properly separate bidi contexts (comment/identifier/character constant/string literal). */ diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c index 298750a..0c71f30 100644 --- a/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c @@ -1,6 +1,6 @@ /* PR preprocessor/103026 */ /* { dg-do compile } */ -/* { dg-options "-Wbidi-chars=unpaired -fdiagnostics-show-caret" } */ +/* { dg-options "-Wbidi-chars=unpaired,ucn -fdiagnostics-show-caret" } */ /* Verify that we escape and underline pertinent bidirectional control characters when quoting the source. */ |