aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Malcolm <dmalcolm@redhat.com>2021-11-02 09:54:32 -0400
committerDavid Malcolm <dmalcolm@redhat.com>2021-11-17 17:32:30 -0500
commit1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3 (patch)
treec75c04138fd6ba0a5f4c2c837e8a6b93e6ff5af4
parentea9e0d6c27405d256b4888e9e860e469037c911d (diff)
downloadgcc-1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3.zip
gcc-1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3.tar.gz
gcc-1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3.tar.bz2
libcpp: escape non-ASCII source bytes in -Wbidi-chars= [PR103026]
This flags rich_locations associated with -Wbidi-chars= so that non-ASCII bytes will be escaped when printing the source lines (using the diagnostics support I added in r12-4825-gbd5e882cf6e0def3dd1bc106075d59a303fe0d1e). In particular, this ensures that the printed source lines will be pure ASCII, and thus the visual ordering of the characters will be the same as the logical ordering. Before: Wbidi-chars-1.c: In function ‘main’: Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] 6 | /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */ | ^ Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] 9 | /* end admins only ‮ { ⁦*/ | ^ Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 6 | int LRE_‪_PDF_\u202c; | ^ Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 8 | int LRE_\u202a_PDF_‬_; | ^ Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 10 | const char *s1 = "LRE_‪_PDF_\u202c"; | ^ Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 12 | const char *s2 = "LRE_\u202a_PDF_‬"; | ^ After: Wbidi-chars-1.c: In function ‘main’: Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] 6 | /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */ | ^ Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] 9 | /* end admins only <U+202E> { <U+2066>*/ | ^ Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 6 | int LRE_<U+202A>_PDF_\u202c; | ^ Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 8 | int LRE_\u202a_PDF_<U+202C>_; | ^ Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 10 | const char *s1 = "LRE_<U+202A>_PDF_\u202c"; | ^ Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=] 12 | const char *s2 = "LRE_\u202a_PDF_<U+202C>"; | ^ libcpp/ChangeLog: PR preprocessor/103026 * lex.c (maybe_warn_bidi_on_close): Use a rich_location and call set_escape_on_output (true) on it. (maybe_warn_bidi_on_char): Likewise. Signed-off-by: David Malcolm <dmalcolm@redhat.com>
-rw-r--r--libcpp/lex.c29
1 files changed, 17 insertions, 12 deletions
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 8188e33..2421d6c 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1427,9 +1427,11 @@ maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
const location_t loc
= linemap_position_for_column (pfile->line_table,
CPP_BUF_COLUMN (pfile->buffer, p));
- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
- "unpaired UTF-8 bidirectional control character "
- "detected");
+ rich_location rich_loc (pfile->line_table, loc);
+ rich_loc.set_escape_on_output (true);
+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+ "unpaired UTF-8 bidirectional control character "
+ "detected");
}
/* We're done with this context. */
bidi::on_close ();
@@ -1454,6 +1456,9 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
const location_t loc
= linemap_position_for_column (pfile->line_table,
CPP_BUF_COLUMN (pfile->buffer, p));
+ rich_location rich_loc (pfile->line_table, loc);
+ rich_loc.set_escape_on_output (true);
+
/* It seems excessive to warn about a PDI/PDF that is closing
an opened context because we've already warned about the
opening character. Except warn when we have a UCN x UTF-8
@@ -1462,20 +1467,20 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
{
if (warn_bidi == bidirectional_unpaired
&& bidi::current_ctx_ucn_p () != ucn_p)
- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
- "UTF-8 vs UCN mismatch when closing "
- "a context by \"%s\"", bidi::to_str (kind));
+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+ "UTF-8 vs UCN mismatch when closing "
+ "a context by \"%s\"", bidi::to_str (kind));
}
else if (warn_bidi == bidirectional_any)
{
if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
- "\"%s\" is closing an unopened context",
- bidi::to_str (kind));
+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+ "\"%s\" is closing an unopened context",
+ bidi::to_str (kind));
else
- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
- "found problematic Unicode character \"%s\"",
- bidi::to_str (kind));
+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+ "found problematic Unicode character \"%s\"",
+ bidi::to_str (kind));
}
}
/* We're done with this context. */