diff options
author | Richard Biener <rguenther@suse.de> | 2025-06-05 14:23:06 +0200 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2025-06-06 09:16:01 +0200 |
commit | 4a80eaefd93c2f1f7c9b71dbc1b97783214b7b2f (patch) | |
tree | f2f2d3c7af46a8c58a73b4b7734f4652c350d41e | |
parent | c6eb92973ea308e248ce23927a9ac58ef81ee7a2 (diff) | |
download | gcc-4a80eaefd93c2f1f7c9b71dbc1b97783214b7b2f.zip gcc-4a80eaefd93c2f1f7c9b71dbc1b97783214b7b2f.tar.gz gcc-4a80eaefd93c2f1f7c9b71dbc1b97783214b7b2f.tar.bz2 |
tree-optimization/120032 - CLZ matching, fallback for missing range-info
The following allows us to emit a conditional move when the value
of the table based CLZ/CLZ implementation at zero differs from what
the target implementation guarantees or we cannot easily fixup
otherwise. In that case emit a val == 0 ? table-based-zero-result : ...
PR tree-optimization/120032
* tree-ssa-forwprop.cc (simplify_count_zeroes): When we cannot use
the IFN to determine the result at zero use a conditional move
to reproduce the correct result from the table-based
algorithm.
* gcc.target/i386/pr120032-3.c: New testcase.
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr120032-3.c | 20 | ||||
-rw-r--r-- | gcc/tree-ssa-forwprop.cc | 27 |
2 files changed, 39 insertions, 8 deletions
diff --git a/gcc/testsuite/gcc.target/i386/pr120032-3.c b/gcc/testsuite/gcc.target/i386/pr120032-3.c new file mode 100644 index 0000000..9523bbb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120032-3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlzcnt" } */ + +unsigned int +ZSTD_countLeadingZeros32_fallback(unsigned int val) +{ + static const unsigned int DeBruijnClz[32] + = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31}; + val |= val >> 1; + val |= val >> 2; + val |= val >> 4; + val |= val >> 8; + val |= val >> 16; + return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; +} + +/* { dg-final { scan-assembler "lzcnt" } } */ diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 0c2b10e..43b1c9d 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -2728,13 +2728,6 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi) nargs = 1; } - /* Skip if there is no value defined at zero, or if we can't easily - return the correct value for zero. */ - if (!zero_ok) - return false; - if (zero_val != ctz_val && !(zero_val == 0 && ctz_val == input_bits)) - return false; - gimple_seq seq = NULL; gimple *g; gcall *call = gimple_build_call_internal (fn, nargs, res_ops[0], @@ -2758,8 +2751,10 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi) prev_lhs = gimple_assign_lhs (g); } + if (zero_ok && zero_val == ctz_val) + ; /* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0. */ - if (zero_val == 0 && ctz_val == input_bits) + else if (zero_ok && zero_val == 0 && ctz_val == input_bits) { g = gimple_build_assign (make_ssa_name (integer_type_node), BIT_AND_EXPR, prev_lhs, @@ -2769,6 +2764,22 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi) gimple_seq_add_stmt (&seq, g); prev_lhs = gimple_assign_lhs (g); } + /* As fallback emit a conditional move. */ + else + { + g = gimple_build_assign (make_ssa_name (boolean_type_node), EQ_EXPR, + res_ops[0], build_zero_cst (input_type)); + gimple_set_location (g, gimple_location (stmt)); + gimple_seq_add_stmt (&seq, g); + tree cond = gimple_assign_lhs (g); + g = gimple_build_assign (make_ssa_name (integer_type_node), + COND_EXPR, cond, + build_int_cst (integer_type_node, zero_val), + prev_lhs); + gimple_set_location (g, gimple_location (stmt)); + gimple_seq_add_stmt (&seq, g); + prev_lhs = gimple_assign_lhs (g); + } g = gimple_build_assign (gimple_assign_lhs (stmt), NOP_EXPR, prev_lhs); gimple_seq_add_stmt (&seq, g); |