From e07df053031e109c50387c92d689950de1d193ab Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 30 Apr 2024 11:22:32 +0200 Subject: gimple-ssa-sprintf: Use [0, 1] range for %lc with (wint_t) 0 argument [PR114876] Seems when Martin S. implemented this, he coded there strict reading of the standard, which said that %lc with (wint_t) 0 argument is handled as wchar_t[2] temp = { arg, 0 }; %ls with temp arg and so shouldn't print any values. But, most of the libc implementations actually handled that case like %c with '\0' argument, adding a single NUL character, the only known exception is musl. Recently, C23 changed this in response to GB-141 and POSIX in https://austingroupbugs.net/view.php?id=1647 so that it should have the same behavior as %c with '\0'. Because there is implementation divergence, the following patch uses a range rather than hardcoding it to all 1s (i.e. the %c behavior), though the likely case is still 1 (forward looking plus most of implementations). The res.knownrange = true; assignment removed is redundant due to the same assignment done unconditionally before the if statement, rest is formatting fixes. I don't think the min >= 0 && min < 128 case is right either, I'd think it should be min >= 0 && max < 128, otherwise it is just some possible inputs are (maybe) ASCII and there can be others, but this code is a total mess anyway, with the min, max, likely (somewhere in [min, max]?) and then unlikely possibly larger than max, dunno, perhaps for at least some chars in the ASCII range the likely case could be for the ascii case; so perhaps just the one_2_one_ascii shouldn't set max to 1 and mayfail should be true for max >= 128. Anyway, didn't feel I should touch that right now. 2024-04-30 Jakub Jelinek PR tree-optimization/114876 * gimple-ssa-sprintf.cc (format_character): For min == 0 && max == 0, set max, likely and unlikely members to 1 rather than 0. Remove useless res.knownrange = true;. Formatting fixes. * gcc.dg/pr114876.c: New test. * gcc.dg/tree-ssa/builtin-sprintf-warn-1.c: Adjust expected diagnostics. (cherry picked from commit 6c6b70f07208ca14ba783933988c04c6fc2fff42) --- gcc/gimple-ssa-sprintf.cc | 20 +++++++------ gcc/testsuite/gcc.dg/pr114876.c | 34 ++++++++++++++++++++++ .../gcc.dg/tree-ssa/builtin-sprintf-warn-1.c | 12 ++++---- 3 files changed, 51 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr114876.c diff --git a/gcc/gimple-ssa-sprintf.cc b/gcc/gimple-ssa-sprintf.cc index 1897570..e02977f 100644 --- a/gcc/gimple-ssa-sprintf.cc +++ b/gcc/gimple-ssa-sprintf.cc @@ -2170,8 +2170,7 @@ format_character (const directive &dir, tree arg, pointer_query &ptr_qry) res.knownrange = true; - if (dir.specifier == 'C' - || dir.modifier == FMT_LEN_l) + if (dir.specifier == 'C' || dir.modifier == FMT_LEN_l) { /* A wide character can result in as few as zero bytes. */ res.range.min = 0; @@ -2182,10 +2181,13 @@ format_character (const directive &dir, tree arg, pointer_query &ptr_qry) { if (min == 0 && max == 0) { - /* The NUL wide character results in no bytes. */ - res.range.max = 0; - res.range.likely = 0; - res.range.unlikely = 0; + /* In strict reading of older ISO C or POSIX, this required + no characters to be emitted. ISO C23 changes that, so + does POSIX, to match what has been implemented in most of the + implementations, namely emitting a single NUL character. + Let's use 0 for minimum and 1 for all the other values. */ + res.range.max = 1; + res.range.likely = res.range.unlikely = 1; } else if (min >= 0 && min < 128) { @@ -2193,11 +2195,12 @@ format_character (const directive &dir, tree arg, pointer_query &ptr_qry) is not a 1-to-1 mapping to the source character set or if the source set is not ASCII. */ bool one_2_one_ascii - = (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97); + = (target_to_host_charmap[0] == 1 + && target_to_host ('a') == 97); /* A wide character in the ASCII range most likely results in a single byte, and only unlikely in up to MB_LEN_MAX. */ - res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();; + res.range.max = one_2_one_ascii ? 1 : target_mb_len_max (); res.range.likely = 1; res.range.unlikely = target_mb_len_max (); res.mayfail = !one_2_one_ascii; @@ -2228,7 +2231,6 @@ format_character (const directive &dir, tree arg, pointer_query &ptr_qry) /* A plain '%c' directive. Its output is exactly 1. */ res.range.min = res.range.max = 1; res.range.likely = res.range.unlikely = 1; - res.knownrange = true; } /* Bump up the byte counters if WIDTH is greater. */ diff --git a/gcc/testsuite/gcc.dg/pr114876.c b/gcc/testsuite/gcc.dg/pr114876.c new file mode 100644 index 0000000..7bb380a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr114876.c @@ -0,0 +1,34 @@ +/* PR tree-optimization/114876 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-not "return \[01\];" "optimized" } } */ +/* { dg-final { scan-tree-dump "return 3;" "optimized" } } */ +/* { dg-final { scan-tree-dump "return 4;" "optimized" } } */ + +int +foo (void) +{ + char buf[64]; + return __builtin_sprintf (buf, "%lc%lc%lc", (__WINT_TYPE__) 0, (__WINT_TYPE__) 0, (__WINT_TYPE__) 0); +} + +int +bar (void) +{ + char buf[64]; + return __builtin_sprintf (buf, "%c%c%c", 0, 0, 0); +} + +int +baz (void) +{ + char buf[64]; + return __builtin_sprintf (buf, "%lc%lc%lca", (__WINT_TYPE__) 0, (__WINT_TYPE__) 0, (__WINT_TYPE__) 0); +} + +int +qux (void) +{ + char buf[64]; + return __builtin_sprintf (buf, "%c%c%ca", 0, 0, 0); +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c index 0cc03ff..6cb2acb 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c @@ -200,11 +200,11 @@ void test_sprintf_chk_c_const (void) T (3, "%c%c", '1', '2'); /* Wide characters. */ - T (0, "%lc", (wint_t)0); /* { dg-warning "nul past the end" } */ - T (1, "%lc", (wint_t)0); - T (1, "%lc%lc", (wint_t)0, (wint_t)0); + T (0, "%lc", (wint_t)0); /* { dg-warning ".%lc. directive writing up to 1 bytes into a region of size 0" } */ + T (1, "%lc", (wint_t)0); /* { dg-warning "nul past the end" } */ + T (1, "%lc%lc", (wint_t)0, (wint_t)0); /* { dg-warning ".%lc. directive writing up to 1 bytes into a region of size between 0 and 1" } */ T (2, "%lc", (wint_t)0); - T (2, "%lc%lc", (wint_t)0, (wint_t)0); + T (2, "%lc%lc", (wint_t)0, (wint_t)0); /* { dg-warning "nul past the end" } */ /* The following could result in as few as no bytes and in as many as MB_CUR_MAX, but since the MB_CUR_MAX value is a runtime property @@ -1550,7 +1550,7 @@ void test_snprintf_c_const (char *d) /* Wide characters. */ T (0, "%lc", (wint_t)0); - T (1, "%lc", (wint_t)0); + T (1, "%lc", (wint_t)0); /* { dg-warning "output may be truncated before the last format character" } */ T (2, "%lc", (wint_t)0); /* The following could result in as few as a single byte and in as many @@ -1603,7 +1603,7 @@ void test_snprintf_chk_c_const (void) /* Wide characters. */ T (0, "%lc", (wint_t)0); - T (1, "%lc", (wint_t)0); + T (1, "%lc", (wint_t)0); /* { dg-warning "output may be truncated before the last format character" } */ T (2, "%lc", (wint_t)0); /* The following could result in as few as a single byte and in as many -- cgit v1.1