aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/c-family/c.opt24
-rw-r--r--gcc/doc/invoke.texi21
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-1.c12
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-10.c27
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-11.c13
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-12.c19
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-13.c17
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-14.c38
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-15.c59
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-16.c26
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-17.c30
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-2.c9
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-3.c11
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-4.c188
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-5.c188
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-6.c155
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-7.c9
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-8.c13
-rw-r--r--gcc/testsuite/c-c++-common/Wbidi-chars-9.c29
-rw-r--r--libcpp/include/cpplib.h18
-rw-r--r--libcpp/init.c1
-rw-r--r--libcpp/internal.h7
-rw-r--r--libcpp/lex.c408
23 files changed, 1315 insertions, 7 deletions
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 8a4cd63..3976fc3 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -374,6 +374,30 @@ Wbad-function-cast
C ObjC Var(warn_bad_function_cast) Warning
Warn about casting functions to incompatible types.
+Wbidi-chars
+C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none)
+;
+
+Wbidi-chars=
+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level)
+-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters.
+
+; Required for these enum values.
+SourceInclude
+cpplib.h
+
+Enum
+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized)
+
+EnumValue
+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none)
+
+EnumValue
+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired)
+
+EnumValue
+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any)
+
Wbool-compare
C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall)
Warn about boolean expression compared with an integer value different from true/false.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6070288..a22758d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -327,7 +327,9 @@ Objective-C and Objective-C++ Dialects}.
-Warith-conversion @gol
-Warray-bounds -Warray-bounds=@var{n} -Warray-compare @gol
-Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol
--Wno-attribute-warning -Wbool-compare -Wbool-operation @gol
+-Wno-attribute-warning @gol
+-Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol
+-Wbool-compare -Wbool-operation @gol
-Wno-builtin-declaration-mismatch @gol
-Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol
-Wc11-c2x-compat @gol
@@ -7678,6 +7680,23 @@ Attributes considered include @code{alloc_align}, @code{alloc_size},
This is the default. You can disable these warnings with either
@option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}.
+@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]}
+@opindex Wbidi-chars=
+@opindex Wbidi-chars
+@opindex Wno-bidi-chars
+Warn about possibly misleading UTF-8 bidirectional control characters in
+comments, string literals, character constants, and identifiers. Such
+characters can change left-to-right writing direction into right-to-left
+(and vice versa), which can cause confusion between the logical order and
+visual order. This may be dangerous; for instance, it may seem that a piece
+of code is not commented out, whereas it in fact is.
+
+There are three levels of warning supported by GCC@. The default is
+@option{-Wbidi-chars=unpaired}, which warns about improperly terminated
+bidi contexts. @option{-Wbidi-chars=none} turns the warning off.
+@option{-Wbidi-chars=any} warns about any use of bidirectional control
+characters.
+
@item -Wbool-compare
@opindex Wno-bool-compare
@opindex Wbool-compare
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
new file mode 100644
index 0000000..34f5ac1
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c
@@ -0,0 +1,12 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+
+int main() {
+ int isAdmin = 0;
+ /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+ __builtin_printf("You are an admin.\n");
+ /* end admins only ‮ { ⁦*/
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+ return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
new file mode 100644
index 0000000..3f851b6
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c
@@ -0,0 +1,27 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=unpaired" } */
+/* More nesting testing. */
+
+/* RLE‫ LRI⁦ PDF‬ PDI⁩*/
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int LRE_\u202a_PDF_\u202c;
+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c;
+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c;
+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c;
+int FSI_\u2068;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int FSI_\u2068_PDI_\u2069;
+int FSI_\u2068_FSI_\u2068_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
new file mode 100644
index 0000000..270ce23
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c
@@ -0,0 +1,13 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=unpaired" } */
+/* Test that we warn when mixing UCN and UTF-8. */
+
+int LRE_‪_PDF_\u202c;
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+int LRE_\u202a_PDF_‬_;
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+const char *s1 = "LRE_‪_PDF_\u202c";
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+const char *s2 = "LRE_\u202a_PDF_‬";
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
new file mode 100644
index 0000000..b07eec1
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c
@@ -0,0 +1,19 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile { target { c || c++11 } } } */
+/* { dg-options "-Wbidi-chars=any" } */
+/* Test raw strings. */
+
+const char *s1 = R"(a b c LRE‪ 1 2 3 PDF‬ x y z)";
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+const char *s2 = R"(a b c RLE‫ 1 2 3 PDF‬ x y z)";
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+const char *s3 = R"(a b c LRO‭ 1 2 3 PDF‬ x y z)";
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+const char *s4 = R"(a b c RLO‮ 1 2 3 PDF‬ x y z)";
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+const char *s7 = R"(a b c FSI⁨ 1 2 3 PDI⁩ x y) z";
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+const char *s8 = R"(a b c PDI⁩ x y )z";
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
+const char *s9 = R"(a b c PDF‬ x y z)";
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-13.c b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
new file mode 100644
index 0000000..b2dd9fd
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-13.c
@@ -0,0 +1,17 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile { target { c || c++11 } } } */
+/* { dg-options "-Wbidi-chars=unpaired" } */
+/* Test raw strings. */
+
+const char *s1 = R"(a b c LRE‪ 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s2 = R"(a b c RLE‫ 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s3 = R"(a b c LRO‭ 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s4 = R"(a b c FSI⁨ 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s5 = R"(a b c LRI⁦ 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s6 = R"(a b c RLI⁧ 1 2 3)";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
new file mode 100644
index 0000000..ba5f75d
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c
@@ -0,0 +1,38 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=unpaired" } */
+/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs,
+ or RLOs. */
+
+/* LRI_⁦_LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩*/
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// LRI_⁦_RLE_‫_RLE_‫_RLE_‫_PDI_⁩
+// LRI_⁦_RLO_‮_RLE_‫_RLE_‫_PDI_⁩
+// LRI_⁦_RLO_‮_RLE_‫_PDI_⁩
+// FSI_⁨_RLO_‮_PDI_⁩
+// FSI_⁨_FSI_⁨_RLO_‮_PDI_⁩
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
+int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int PDI_\u2069;
+int LRI_\u2066_PDI_\u2069;
+int RLI_\u2067_PDI_\u2069;
+int LRE_\u202a_LRI_\u2066_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069;
+int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
+int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLO_\u202e_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int RLI_\u2067_PDI_\u2069_RLI_\u2067;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int FSI_\u2068_PDF_\u202c_PDI_\u2069;
+int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
new file mode 100644
index 0000000..a0ce8ff
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c
@@ -0,0 +1,59 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=unpaired" } */
+/* Test unpaired bidi control chars in multiline comments. */
+
+/*
+ * LRE‪ end
+ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+/*
+ * RLE‫ end
+ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+/*
+ * LRO‭ end
+ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+/*
+ * RLO‮ end
+ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+/*
+ * LRI⁦ end
+ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+/*
+ * RLI⁧ end
+ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+/*
+ * FSI⁨ end
+ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+/* LRE‪
+ PDF‬ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+/* FSI⁨
+ PDI⁩ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+
+/* LRE<‪>
+ *
+ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */
+
+/*
+ * LRE<‪>
+ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+
+/*
+ *
+ * LRE<‪> */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+/* RLI<⁧> */ /* PDI<⁩> */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* LRE<‪> */ /* PDF<‬> */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
new file mode 100644
index 0000000..baa0159
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c
@@ -0,0 +1,26 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=any" } */
+/* Test LTR/RTL chars. */
+
+/* LTR<‎> */
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
+// LTR<‎>
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
+/* RTL<‏> */
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
+// RTL<‏>
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
+
+const char *s1 = "LTR<‎>";
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
+const char *s2 = "LTR\u200e";
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
+const char *s3 = "LTR\u200E";
+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */
+const char *s4 = "RTL<‏>";
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
+const char *s5 = "RTL\u200f";
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
+const char *s6 = "RTL\u200F";
+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
new file mode 100644
index 0000000..07cb432
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c
@@ -0,0 +1,30 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=unpaired" } */
+/* Test LTR/RTL chars. */
+
+/* LTR<‎> */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// LTR<‎>
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* RTL<‏> */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// RTL<‏>
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int ltr_\u200e;
+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
+int rtl_\u200f;
+/* { dg-error "universal character " "" { target *-*-* } .-1 } */
+
+const char *s1 = "LTR<‎>";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+const char *s2 = "LTR\u200e";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+const char *s3 = "LTR\u200E";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+const char *s4 = "RTL<‏>";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+const char *s5 = "RTL\u200f";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+const char *s6 = "RTL\u200F";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
new file mode 100644
index 0000000..2340374
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c
@@ -0,0 +1,9 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+
+int main() {
+ /* Say hello; newline⁧/*/ return 0 ;
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+ __builtin_printf("Hello world.\n");
+ return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
new file mode 100644
index 0000000..9dc7edb
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c
@@ -0,0 +1,11 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+
+int main() {
+ const char* access_level = "user";
+ if (__builtin_strcmp(access_level, "user‮ ⁦// Check if admin⁩ ⁦")) {
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+ __builtin_printf("You are an admin.\n");
+ }
+ return 0;
+}
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
new file mode 100644
index 0000000..639e5c6
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c
@@ -0,0 +1,188 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */
+/* Test all bidi chars in various contexts (identifiers, comments,
+ string literals, character constants), both UCN and UTF-8. The bidi
+ chars here are properly terminated, except for the character constants. */
+
+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+
+/* Same but C++ comments instead. */
+// a b c LRE‪ 1 2 3 PDF‬ x y z
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+// a b c RLE‫ 1 2 3 PDF‬ x y z
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+// a b c LRO‭ 1 2 3 PDF‬ x y z
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+// a b c RLO‮ 1 2 3 PDF‬ x y z
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+// a b c RLI⁧ 1 2 3 PDI⁩ x y
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+
+/* Here we're closing an unopened context, warn when =any. */
+/* a b c PDI⁩ x y z */
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
+/* a b c PDF‬ x y z */
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+// a b c PDI⁩ x y z
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
+// a b c PDF‬ x y z
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+
+/* Multiline comments. */
+/* a b c PDI⁩ x y z
+ */
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
+/* a b c PDF‬ x y z
+ */
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
+/* first
+ a b c PDI⁩ x y z
+ */
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */
+/* first
+ a b c PDF‬ x y z
+ */
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */
+/* first
+ a b c PDI⁩ x y z */
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
+/* first
+ a b c PDF‬ x y z */
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+
+void
+g1 ()
+{
+ const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+ const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+ const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+ const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+ const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+ const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+ const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+ const char *s8 = "a b c PDI⁩ x y z";
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
+ const char *s9 = "a b c PDF‬ x y z";
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+
+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+}
+
+void
+g2 ()
+{
+ const char c1 = '\u202a';
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+ const char c2 = '\u202A';
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+ const char c3 = '\u202b';
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+ const char c4 = '\u202B';
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+ const char c5 = '\u202d';
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+ const char c6 = '\u202D';
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+ const char c7 = '\u202e';
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+ const char c8 = '\u202E';
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+ const char c9 = '\u2066';
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+ const char c10 = '\u2067';
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+ const char c11 = '\u2068';
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+}
+
+int a‪b‬c;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int a‫b‬c;
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+int a‭b‬c;
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+int a‮b‬c;
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+int a⁦b⁩c;
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+int a⁧b⁩c;
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+int a⁨b⁩c;
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+int A‬X;
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+int A\u202cY;
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+int A\u202CY2;
+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */
+
+int d\u202ae\u202cf;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int d\u202Ae\u202cf2;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int d\u202be\u202cf;
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+int d\u202Be\u202cf2;
+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */
+int d\u202de\u202cf;
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+int d\u202De\u202cf2;
+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */
+int d\u202ee\u202cf;
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+int d\u202Ee\u202cf2;
+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */
+int d\u2066e\u2069f;
+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */
+int d\u2067e\u2069f;
+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */
+int d\u2068e\u2069f;
+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */
+int X\u2069;
+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
new file mode 100644
index 0000000..68cb053
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c
@@ -0,0 +1,188 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */
+/* Test all bidi chars in various contexts (identifiers, comments,
+ string literals, character constants), both UCN and UTF-8. The bidi
+ chars here are properly terminated, except for the character constants. */
+
+/* a b c LRE‪ 1 2 3 PDF‬ x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLE‫ 1 2 3 PDF‬ x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c LRO‭ 1 2 3 PDF‬ x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLO‮ 1 2 3 PDF‬ x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c LRI⁦ 1 2 3 PDI⁩ x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLI⁧ 1 2 3 PDI⁩ x y */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c FSI⁨ 1 2 3 PDI⁩ x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+/* Same but C++ comments instead. */
+// a b c LRE‪ 1 2 3 PDF‬ x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLE‫ 1 2 3 PDF‬ x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c LRO‭ 1 2 3 PDF‬ x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLO‮ 1 2 3 PDF‬ x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c LRI⁦ 1 2 3 PDI⁩ x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLI⁧ 1 2 3 PDI⁩ x y
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c FSI⁨ 1 2 3 PDI⁩ x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+/* Here we're closing an unopened context, warn when =any. */
+/* a b c PDI⁩ x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* a b c PDF‬ x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c PDI⁩ x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+// a b c PDF‬ x y z
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+/* Multiline comments. */
+/* a b c PDI⁩ x y z
+ */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
+/* a b c PDF‬ x y z
+ */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
+/* first
+ a b c PDI⁩ x y z
+ */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
+/* first
+ a b c PDF‬ x y z
+ */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */
+/* first
+ a b c PDI⁩ x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+/* first
+ a b c PDF‬ x y z */
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+void
+g1 ()
+{
+ const char *s1 = "a b c LRE‪ 1 2 3 PDF‬ x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s2 = "a b c RLE‫ 1 2 3 PDF‬ x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s3 = "a b c LRO‭ 1 2 3 PDF‬ x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s4 = "a b c RLO‮ 1 2 3 PDF‬ x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s5 = "a b c LRI⁦ 1 2 3 PDI⁩ x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s6 = "a b c RLI⁧ 1 2 3 PDI⁩ x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s7 = "a b c FSI⁨ 1 2 3 PDI⁩ x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s8 = "a b c PDI⁩ x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s9 = "a b c PDF‬ x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z";
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+}
+
+void
+g2 ()
+{
+ const char c1 = '\u202a';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c2 = '\u202A';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c3 = '\u202b';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c4 = '\u202B';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c5 = '\u202d';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c6 = '\u202D';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c7 = '\u202e';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c8 = '\u202E';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c9 = '\u2066';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c10 = '\u2067';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char c11 = '\u2068';
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+}
+
+int a‪b‬c;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int a‫b‬c;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int a‭b‬c;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int a‮b‬c;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int a⁦b⁩c;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int a⁧b⁩c;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int a⁨b⁩c;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int A‬X;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int A\u202cY;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int A\u202CY2;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+
+int d\u202ae\u202cf;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202Ae\u202cf2;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202be\u202cf;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202Be\u202cf2;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202de\u202cf;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202De\u202cf2;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202ee\u202cf;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u202Ee\u202cf2;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u2066e\u2069f;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u2067e\u2069f;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int d\u2068e\u2069f;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
+int X\u2069;
+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
new file mode 100644
index 0000000..0ce6fff
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c
@@ -0,0 +1,155 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=unpaired" } */
+/* Test nesting of bidi chars in various contexts. */
+
+/* Terminated by the wrong char: */
+/* a b c LRE‪ 1 2 3 PDI⁩ x y z */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLE‫ 1 2 3 PDI⁩ x y z*/
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c LRO‭ 1 2 3 PDI⁩ x y z */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLO‮ 1 2 3 PDI⁩ x y z */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c LRI⁦ 1 2 3 PDF‬ x y z */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c RLI⁧ 1 2 3 PDF‬ x y z */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* a b c FSI⁨ 1 2 3 PDF‬ x y z*/
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+/* LRE‪ PDF‬ */
+/* LRE‪ LRE‪ PDF‬ PDF‬ */
+/* PDF‬ LRE‪ PDF‬ */
+/* LRE‪ PDF‬ LRE‪ PDF‬ */
+/* LRE‪ LRE‪ PDF‬ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* PDF‬ LRE‪ */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+// a b c LRE‪ 1 2 3 PDI⁩ x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLE‫ 1 2 3 PDI⁩ x y z*/
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c LRO‭ 1 2 3 PDI⁩ x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLO‮ 1 2 3 PDI⁩ x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c LRI⁦ 1 2 3 PDF‬ x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c RLI⁧ 1 2 3 PDF‬ x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// a b c FSI⁨ 1 2 3 PDF‬ x y z
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+// LRE‪ PDF‬
+// LRE‪ LRE‪ PDF‬ PDF‬
+// PDF‬ LRE‪ PDF‬
+// LRE‪ PDF‬ LRE‪ PDF‬
+// LRE‪ LRE‪ PDF‬
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+// PDF‬ LRE‪
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+void
+g1 ()
+{
+ const char *s1 = "a b c LRE‪ 1 2 3 PDI⁩ x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s3 = "a b c RLE‫ 1 2 3 PDI⁩ x y ";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s5 = "a b c LRO‭ 1 2 3 PDI⁩ x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s7 = "a b c RLO‮ 1 2 3 PDI⁩ x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s9 = "a b c LRI⁦ 1 2 3 PDF‬ x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s11 = "a b c RLI⁧ 1 2 3 PDF‬ x y z\
+ ";
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+ const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s13 = "a b c FSI⁨ 1 2 3 PDF‬ x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s15 = "PDF‬ LRE‪";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s16 = "PDF\u202c LRE\u202a";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s17 = "LRE‪ PDF‬";
+ const char *s18 = "LRE\u202a PDF\u202c";
+ const char *s19 = "LRE‪ LRE‪ PDF‬ PDF‬";
+ const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c";
+ const char *s21 = "PDF‬ LRE‪ PDF‬";
+ const char *s22 = "PDF\u202c LRE\u202a PDF\u202c";
+ const char *s23 = "LRE‪ LRE‪ PDF‬";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s24 = "LRE\u202a LRE\u202a PDF\u202c";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s25 = "PDF‬ LRE‪";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s26 = "PDF\u202c LRE\u202a";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s27 = "PDF‬ LRE\u202a";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+ const char *s28 = "PDF\u202c LRE‪";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+}
+
+int aLRE‪bPDI⁩;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int A\u202aB\u2069C;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aRLE‫bPDI⁩;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u202bB\u2069c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aLRO‭bPDI⁩;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u202db\u2069c2;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aRLO‮bPDI⁩;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u202eb\u2069;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aLRI⁦bPDF‬;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u2066b\u202c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aRLI⁧bPDF‬c
+;
+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */
+int a\u2067b\u202c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aFSI⁨bPDF‬;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a\u2068b\u202c;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aFSI⁨bPD\u202C;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aFSI\u2068bPDF‬_;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int aLRE‪bPDF‬b;
+int A\u202aB\u202c;
+int a_LRE‪_LRE‪_b_PDF‬_PDF‬;
+int A\u202aA\u202aB\u202cB\u202c;
+int aPDF‬bLREadPDF‬;
+int a_\u202C_\u202a_\u202c;
+int a_LRE‪_b_PDF‬_c_LRE‪_PDF‬;
+int a_\u202a_\u202c_\u202a_\u202c_;
+int a_LRE‪_b_PDF‬_c_LRE‪;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int a_\u202a_\u202c_\u202a_;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
new file mode 100644
index 0000000..d012d42
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c
@@ -0,0 +1,9 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=any" } */
+/* Test we ignore UCNs in comments. */
+
+// a b c \u202a 1 2 3
+// a b c \u202A 1 2 3
+/* a b c \u202a 1 2 3 */
+/* a b c \u202A 1 2 3 */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
new file mode 100644
index 0000000..4f54c50
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c
@@ -0,0 +1,13 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=any" } */
+/* Test \u vs \U. */
+
+int a_\u202A;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int a_\u202a_2;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int a_\U0000202A_3;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
+int a_\U0000202a_4;
+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */
diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
new file mode 100644
index 0000000..e2af1b1
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c
@@ -0,0 +1,29 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidi-chars=unpaired" } */
+/* Test that we properly separate bidi contexts (comment/identifier/character
+ constant/string literal). */
+
+/* LRE ->‪<- */ int pdf_\u202c_1;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* RLE ->‫<- */ int pdf_\u202c_2;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* LRO ->‭<- */ int pdf_\u202c_3;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* RLO ->‮<- */ int pdf_\u202c_4;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* LRI ->⁦<-*/ int pdi_\u2069_1;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* RLI ->⁧<- */ int pdi_\u2069_12;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* FSI ->⁨<- */ int pdi_\u2069_3;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+
+const char *s1 = "LRE\u202a"; /* PDF ->‬<- */
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+/* LRE ->‪<- */ const char *s2 = "PDF\u202c";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+const char *s3 = "LRE\u202a"; int pdf_\u202c_5;
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
+int lre_\u202a; const char *s4 = "PDF\u202c";
+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 176f8c5..112b9c2 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -319,6 +319,17 @@ enum cpp_main_search
CMS_system, /* Search the system INCLUDE path. */
};
+/* The possible bidirectional control characters checking levels, from least
+ restrictive to most. */
+enum cpp_bidirectional_level {
+ /* No checking. */
+ bidirectional_none,
+ /* Only detect unpaired uses of bidirectional control characters. */
+ bidirectional_unpaired,
+ /* Detect any use of bidirectional control characters. */
+ bidirectional_any
+};
+
/* This structure is nested inside struct cpp_reader, and
carries all the options visible to the command line. */
struct cpp_options
@@ -539,6 +550,10 @@ struct cpp_options
/* True if warn about differences between C++98 and C++11. */
bool cpp_warn_cxx11_compat;
+ /* Nonzero if bidirectional control characters checking is on. See enum
+ cpp_bidirectional_level. */
+ unsigned char cpp_warn_bidirectional;
+
/* Dependency generation. */
struct
{
@@ -643,7 +658,8 @@ enum cpp_warning_reason {
CPP_W_C90_C99_COMPAT,
CPP_W_C11_C2X_COMPAT,
CPP_W_CXX11_COMPAT,
- CPP_W_EXPANSION_TO_DEFINED
+ CPP_W_EXPANSION_TO_DEFINED,
+ CPP_W_BIDIRECTIONAL
};
/* Callback for header lookup for HEADER, which is the name of a
diff --git a/libcpp/init.c b/libcpp/init.c
index 5a424e2..f9a8f5f 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -223,6 +223,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
= ENABLE_CANONICAL_SYSTEM_HEADERS;
CPP_OPTION (pfile, ext_numeric_literals) = 1;
CPP_OPTION (pfile, warn_date_time) = 0;
+ CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired;
/* Default CPP arithmetic to something sensible for the host for the
benefit of dumb users like fix-header. */
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 8577cab..0ce0246 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -597,6 +597,13 @@ struct cpp_reader
/* Location identifying the main source file -- intended to be line
zero of said file. */
location_t main_loc;
+
+ /* Returns true iff we should warn about UTF-8 bidirectional control
+ characters. */
+ bool warn_bidi_p () const
+ {
+ return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none;
+ }
};
/* Character classes. Based on the more primitive macros in safe-ctype.h.
diff --git a/libcpp/lex.c b/libcpp/lex.c
index fa2253d..6a4fbce 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
}
}
+namespace bidi {
+ enum class kind {
+ NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
+ };
+
+ /* All the UTF-8 encodings of bidi characters start with E2. */
+ constexpr uchar utf8_start = 0xe2;
+
+ /* A vector holding currently open bidi contexts. We use a char for
+ each context, its LSB is 1 if it represents a PDF context, 0 if it
+ represents a PDI context. The next bit is 1 if this context was open
+ by a bidi character written as a UCN, and 0 when it was UTF-8. */
+ semi_embedded_vec <unsigned char, 16> vec;
+
+ /* Close the whole comment/identifier/string literal/character constant
+ context. */
+ void on_close ()
+ {
+ vec.truncate (0);
+ }
+
+ /* Pop the last element in the vector. */
+ void pop ()
+ {
+ unsigned int len = vec.count ();
+ gcc_checking_assert (len > 0);
+ vec.truncate (len - 1);
+ }
+
+ /* Return the context of the Ith element. */
+ kind ctx_at (unsigned int i)
+ {
+ return (vec[i] & 1) ? kind::PDF : kind::PDI;
+ }
+
+ /* Return which context is currently opened. */
+ kind current_ctx ()
+ {
+ unsigned int len = vec.count ();
+ if (len == 0)
+ return kind::NONE;
+ return ctx_at (len - 1);
+ }
+
+ /* Return true if the current context comes from a UCN origin, that is,
+ the bidi char which started this bidi context was written as a UCN. */
+ bool current_ctx_ucn_p ()
+ {
+ unsigned int len = vec.count ();
+ gcc_checking_assert (len > 0);
+ return (vec[len - 1] >> 1) & 1;
+ }
+
+ /* We've read a bidi char, update the current vector as necessary. */
+ void on_char (kind k, bool ucn_p)
+ {
+ switch (k)
+ {
+ case kind::LRE:
+ case kind::RLE:
+ case kind::LRO:
+ case kind::RLO:
+ vec.push (ucn_p ? 3u : 1u);
+ break;
+ case kind::LRI:
+ case kind::RLI:
+ case kind::FSI:
+ vec.push (ucn_p ? 2u : 0u);
+ break;
+ /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
+ whose scope has not yet been terminated. */
+ case kind::PDF:
+ if (current_ctx () == kind::PDF)
+ pop ();
+ break;
+ /* PDI terminates the scope of the last LRI, RLI, or FSI whose
+ scope has not yet been terminated, as well as the scopes of
+ any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
+ yet been terminated. */
+ case kind::PDI:
+ for (int i = vec.count () - 1; i >= 0; --i)
+ if (ctx_at (i) == kind::PDI)
+ {
+ vec.truncate (i);
+ break;
+ }
+ break;
+ case kind::LTR:
+ case kind::RTL:
+ /* These aren't popped by a PDF/PDI. */
+ break;
+ [[likely]] case kind::NONE:
+ break;
+ default:
+ abort ();
+ }
+ }
+
+ /* Return a descriptive string for K. */
+ const char *to_str (kind k)
+ {
+ switch (k)
+ {
+ case kind::LRE:
+ return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
+ case kind::RLE:
+ return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
+ case kind::LRO:
+ return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
+ case kind::RLO:
+ return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
+ case kind::LRI:
+ return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
+ case kind::RLI:
+ return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
+ case kind::FSI:
+ return "U+2068 (FIRST STRONG ISOLATE)";
+ case kind::PDF:
+ return "U+202C (POP DIRECTIONAL FORMATTING)";
+ case kind::PDI:
+ return "U+2069 (POP DIRECTIONAL ISOLATE)";
+ case kind::LTR:
+ return "U+200E (LEFT-TO-RIGHT MARK)";
+ case kind::RTL:
+ return "U+200F (RIGHT-TO-LEFT MARK)";
+ default:
+ abort ();
+ }
+ }
+}
+
+/* Parse a sequence of 3 bytes starting with P and return its bidi code. */
+
+static bidi::kind
+get_bidi_utf8 (const unsigned char *const p)
+{
+ gcc_checking_assert (p[0] == bidi::utf8_start);
+
+ if (p[1] == 0x80)
+ switch (p[2])
+ {
+ case 0xaa:
+ return bidi::kind::LRE;
+ case 0xab:
+ return bidi::kind::RLE;
+ case 0xac:
+ return bidi::kind::PDF;
+ case 0xad:
+ return bidi::kind::LRO;
+ case 0xae:
+ return bidi::kind::RLO;
+ case 0x8e:
+ return bidi::kind::LTR;
+ case 0x8f:
+ return bidi::kind::RTL;
+ default:
+ break;
+ }
+ else if (p[1] == 0x81)
+ switch (p[2])
+ {
+ case 0xa6:
+ return bidi::kind::LRI;
+ case 0xa7:
+ return bidi::kind::RLI;
+ case 0xa8:
+ return bidi::kind::FSI;
+ case 0xa9:
+ return bidi::kind::PDI;
+ default:
+ break;
+ }
+
+ return bidi::kind::NONE;
+}
+
+/* Parse a UCN where P points just past \u or \U and return its bidi code. */
+
+static bidi::kind
+get_bidi_ucn (const unsigned char *p, bool is_U)
+{
+ /* 6.4.3 Universal Character Names
+ \u hex-quad
+ \U hex-quad hex-quad
+ where \unnnn means \U0000nnnn. */
+
+ if (is_U)
+ {
+ if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
+ return bidi::kind::NONE;
+ /* Skip 4B so we can treat \u and \U the same below. */
+ p += 4;
+ }
+
+ /* All code points we are looking for start with 20xx. */
+ if (p[0] != '2' || p[1] != '0')
+ return bidi::kind::NONE;
+ else if (p[2] == '2')
+ switch (p[3])
+ {
+ case 'a':
+ case 'A':
+ return bidi::kind::LRE;
+ case 'b':
+ case 'B':
+ return bidi::kind::RLE;
+ case 'c':
+ case 'C':
+ return bidi::kind::PDF;
+ case 'd':
+ case 'D':
+ return bidi::kind::LRO;
+ case 'e':
+ case 'E':
+ return bidi::kind::RLO;
+ default:
+ break;
+ }
+ else if (p[2] == '6')
+ switch (p[3])
+ {
+ case '6':
+ return bidi::kind::LRI;
+ case '7':
+ return bidi::kind::RLI;
+ case '8':
+ return bidi::kind::FSI;
+ case '9':
+ return bidi::kind::PDI;
+ default:
+ break;
+ }
+ else if (p[2] == '0')
+ switch (p[3])
+ {
+ case 'e':
+ case 'E':
+ return bidi::kind::LTR;
+ case 'f':
+ case 'F':
+ return bidi::kind::RTL;
+ default:
+ break;
+ }
+
+ return bidi::kind::NONE;
+}
+
+/* We're closing a bidi context, that is, we've encountered a newline,
+ are closing a C-style comment, or are at the end of a string literal,
+ character constant, or identifier. Warn if this context was not
+ properly terminated by a PDI or PDF. P points to the last character
+ in this context. */
+
+static void
+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
+{
+ if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired
+ && bidi::vec.count () > 0)
+ {
+ const location_t loc
+ = linemap_position_for_column (pfile->line_table,
+ CPP_BUF_COLUMN (pfile->buffer, p));
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
+ "unpaired UTF-8 bidirectional control character "
+ "detected");
+ }
+ /* We're done with this context. */
+ bidi::on_close ();
+}
+
+/* We're at the beginning or in the middle of an identifier/comment/string
+ literal/character constant. Warn if we've encountered a bidi character.
+ KIND says which bidi character it was; P points to it in the character
+ stream. UCN_P is true iff this bidi character was written as a UCN. */
+
+static void
+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
+ bool ucn_p)
+{
+ if (__builtin_expect (kind == bidi::kind::NONE, 1))
+ return;
+
+ const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
+
+ if (warn_bidi != bidirectional_none)
+ {
+ const location_t loc
+ = linemap_position_for_column (pfile->line_table,
+ CPP_BUF_COLUMN (pfile->buffer, p));
+ /* It seems excessive to warn about a PDI/PDF that is closing
+ an opened context because we've already warned about the
+ opening character. Except warn when we have a UCN x UTF-8
+ mismatch. */
+ if (kind == bidi::current_ctx ())
+ {
+ if (warn_bidi == bidirectional_unpaired
+ && bidi::current_ctx_ucn_p () != ucn_p)
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
+ "UTF-8 vs UCN mismatch when closing "
+ "a context by \"%s\"", bidi::to_str (kind));
+ }
+ else if (warn_bidi == bidirectional_any)
+ {
+ if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
+ "\"%s\" is closing an unopened context",
+ bidi::to_str (kind));
+ else
+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
+ "found problematic Unicode character \"%s\"",
+ bidi::to_str (kind));
+ }
+ }
+ /* We're done with this context. */
+ bidi::on_char (kind, ucn_p);
+}
+
/* Skip a C-style block comment. We find the end of the comment by
seeing if an asterisk is before every '/' we encounter. Returns
nonzero if comment terminated by EOF, zero otherwise.
@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfile)
cpp_buffer *buffer = pfile->buffer;
const uchar *cur = buffer->cur;
uchar c;
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
cur++;
if (*cur == '/')
@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfile)
if (c == '/')
{
if (cur[-2] == '*')
- break;
+ {
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, cur);
+ break;
+ }
/* Warn about potential nested comments, but not if the '/'
comes immediately before the true comment delimiter.
@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfile)
{
unsigned int cols;
buffer->cur = cur - 1;
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, cur);
_cpp_process_line_notes (pfile, true);
if (buffer->next_line >= buffer->rlimit)
return true;
@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfile)
cur = buffer->cur;
}
+ /* If this is a beginning of a UTF-8 encoding, it might be
+ a bidirectional control character. */
+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
+ {
+ bidi::kind kind = get_bidi_utf8 (cur - 1);
+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
+ }
}
buffer->cur = cur;
@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile)
{
cpp_buffer *buffer = pfile->buffer;
location_t orig_line = pfile->line_table->highest_line;
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
- while (*buffer->cur != '\n')
- buffer->cur++;
+ if (!warn_bidi_p)
+ while (*buffer->cur != '\n')
+ buffer->cur++;
+ else
+ {
+ while (*buffer->cur != '\n'
+ && *buffer->cur != bidi::utf8_start)
+ buffer->cur++;
+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
+ {
+ while (*buffer->cur != '\n')
+ {
+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
+ {
+ bidi::kind kind = get_bidi_utf8 (buffer->cur);
+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
+ /*ucn_p=*/false);
+ }
+ buffer->cur++;
+ }
+ maybe_warn_bidi_on_close (pfile, buffer->cur);
+ }
+ }
_cpp_process_line_notes (pfile, true);
return orig_line != pfile->line_table->highest_line;
@@ -1346,11 +1700,13 @@ static const cppchar_t utf8_signifier = 0xC0;
/* Returns TRUE if the sequence starting at buffer->cur is valid in
an identifier. FIRST is TRUE if this starts an identifier. */
+
static bool
forms_identifier_p (cpp_reader *pfile, int first,
struct normalize_state *state)
{
cpp_buffer *buffer = pfile->buffer;
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
if (*buffer->cur == '$')
{
@@ -1373,6 +1729,13 @@ forms_identifier_p (cpp_reader *pfile, int first,
cppchar_t s;
if (*buffer->cur >= utf8_signifier)
{
+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
+ && warn_bidi_p)
+ {
+ bidi::kind kind = get_bidi_utf8 (buffer->cur);
+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
+ /*ucn_p=*/false);
+ }
if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
state, &s))
return true;
@@ -1381,6 +1744,13 @@ forms_identifier_p (cpp_reader *pfile, int first,
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
buffer->cur += 2;
+ if (warn_bidi_p)
+ {
+ bidi::kind kind = get_bidi_ucn (buffer->cur,
+ buffer->cur[-1] == 'U');
+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
+ /*ucn_p=*/true);
+ }
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
state, &s, NULL, NULL))
return true;
@@ -1489,6 +1859,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
const uchar *cur;
unsigned int len;
unsigned int hash = HT_HASHSTEP (0, *base);
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
cur = pfile->buffer->cur;
if (! starts_ucn)
@@ -1512,6 +1883,8 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
pfile->buffer->cur++;
}
} while (forms_identifier_p (pfile, false, nst));
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
result = _cpp_interpret_identifier (pfile, base,
pfile->buffer->cur - base);
*spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
@@ -1758,6 +2131,7 @@ static void
lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
{
const uchar *pos = base;
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
/* 'tis a pity this information isn't passed down from the lexer's
initial categorization of the token. */
@@ -1994,8 +2368,15 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
pos = base = pfile->buffer->cur;
note = &pfile->buffer->notes[pfile->buffer->cur_note];
}
+ else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
+ && warn_bidi_p)
+ maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1),
+ /*ucn_p=*/false);
}
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, pos);
+
if (CPP_OPTION (pfile, user_literals))
{
/* If a string format macro, say from inttypes.h, is placed touching
@@ -2090,15 +2471,27 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
else
terminator = '>', type = CPP_HEADER_NAME;
+ const bool warn_bidi_p = pfile->warn_bidi_p ();
for (;;)
{
cppchar_t c = *cur++;
/* In #include-style directives, terminators are not escapable. */
if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
- cur++;
+ {
+ if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
+ {
+ bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
+ }
+ cur++;
+ }
else if (c == terminator)
- break;
+ {
+ if (warn_bidi_p)
+ maybe_warn_bidi_on_close (pfile, cur - 1);
+ break;
+ }
else if (c == '\n')
{
cur--;
@@ -2115,6 +2508,11 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
}
else if (c == '\0')
saw_NUL = true;
+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
+ {
+ bidi::kind kind = get_bidi_utf8 (cur - 1);
+ maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
+ }
}
if (saw_NUL && !pfile->state.skipping)