aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2025-06-05 14:23:06 +0200
committerRichard Biener <rguenther@suse.de>2025-06-06 09:16:01 +0200
commit4a80eaefd93c2f1f7c9b71dbc1b97783214b7b2f (patch)
treef2f2d3c7af46a8c58a73b4b7734f4652c350d41e
parentc6eb92973ea308e248ce23927a9ac58ef81ee7a2 (diff)
downloadgcc-4a80eaefd93c2f1f7c9b71dbc1b97783214b7b2f.zip
gcc-4a80eaefd93c2f1f7c9b71dbc1b97783214b7b2f.tar.gz
gcc-4a80eaefd93c2f1f7c9b71dbc1b97783214b7b2f.tar.bz2
tree-optimization/120032 - CLZ matching, fallback for missing range-info
The following allows us to emit a conditional move when the value of the table based CLZ/CLZ implementation at zero differs from what the target implementation guarantees or we cannot easily fixup otherwise. In that case emit a val == 0 ? table-based-zero-result : ... PR tree-optimization/120032 * tree-ssa-forwprop.cc (simplify_count_zeroes): When we cannot use the IFN to determine the result at zero use a conditional move to reproduce the correct result from the table-based algorithm. * gcc.target/i386/pr120032-3.c: New testcase.
-rw-r--r--gcc/testsuite/gcc.target/i386/pr120032-3.c20
-rw-r--r--gcc/tree-ssa-forwprop.cc27
2 files changed, 39 insertions, 8 deletions
diff --git a/gcc/testsuite/gcc.target/i386/pr120032-3.c b/gcc/testsuite/gcc.target/i386/pr120032-3.c
new file mode 100644
index 0000000..9523bbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr120032-3.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlzcnt" } */
+
+unsigned int
+ZSTD_countLeadingZeros32_fallback(unsigned int val)
+{
+ static const unsigned int DeBruijnClz[32]
+ = { 0, 9, 1, 10, 13, 21, 2, 29,
+ 11, 14, 16, 18, 22, 25, 3, 30,
+ 8, 12, 20, 28, 15, 17, 24, 7,
+ 19, 27, 23, 6, 26, 5, 4, 31};
+ val |= val >> 1;
+ val |= val >> 2;
+ val |= val >> 4;
+ val |= val >> 8;
+ val |= val >> 16;
+ return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
+}
+
+/* { dg-final { scan-assembler "lzcnt" } } */
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 0c2b10e..43b1c9d 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -2728,13 +2728,6 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
nargs = 1;
}
- /* Skip if there is no value defined at zero, or if we can't easily
- return the correct value for zero. */
- if (!zero_ok)
- return false;
- if (zero_val != ctz_val && !(zero_val == 0 && ctz_val == input_bits))
- return false;
-
gimple_seq seq = NULL;
gimple *g;
gcall *call = gimple_build_call_internal (fn, nargs, res_ops[0],
@@ -2758,8 +2751,10 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
prev_lhs = gimple_assign_lhs (g);
}
+ if (zero_ok && zero_val == ctz_val)
+ ;
/* Emit ctz (x) & 31 if ctz (0) is 32 but we need to return 0. */
- if (zero_val == 0 && ctz_val == input_bits)
+ else if (zero_ok && zero_val == 0 && ctz_val == input_bits)
{
g = gimple_build_assign (make_ssa_name (integer_type_node),
BIT_AND_EXPR, prev_lhs,
@@ -2769,6 +2764,22 @@ simplify_count_zeroes (gimple_stmt_iterator *gsi)
gimple_seq_add_stmt (&seq, g);
prev_lhs = gimple_assign_lhs (g);
}
+ /* As fallback emit a conditional move. */
+ else
+ {
+ g = gimple_build_assign (make_ssa_name (boolean_type_node), EQ_EXPR,
+ res_ops[0], build_zero_cst (input_type));
+ gimple_set_location (g, gimple_location (stmt));
+ gimple_seq_add_stmt (&seq, g);
+ tree cond = gimple_assign_lhs (g);
+ g = gimple_build_assign (make_ssa_name (integer_type_node),
+ COND_EXPR, cond,
+ build_int_cst (integer_type_node, zero_val),
+ prev_lhs);
+ gimple_set_location (g, gimple_location (stmt));
+ gimple_seq_add_stmt (&seq, g);
+ prev_lhs = gimple_assign_lhs (g);
+ }
g = gimple_build_assign (gimple_assign_lhs (stmt), NOP_EXPR, prev_lhs);
gimple_seq_add_stmt (&seq, g);