aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-patterns.cc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2023-03-12 18:42:04 +0000
committerTamar Christina <tamar.christina@arm.com>2023-03-12 18:42:04 +0000
commit81fd62d1378b7ddc1fa0967cbddcdcdcdd2d8d8c (patch)
tree3a6881be284bf7e9dbcb377959334cc7f2caeed4 /gcc/tree-vect-patterns.cc
parent0b3c630fcc44063a61f6131af48a4171b1de2b37 (diff)
downloadgcc-81fd62d1378b7ddc1fa0967cbddcdcdcdd2d8d8c.zip
gcc-81fd62d1378b7ddc1fa0967cbddcdcdcdd2d8d8c.tar.gz
gcc-81fd62d1378b7ddc1fa0967cbddcdcdcdd2d8d8c.tar.bz2
middle-end: Implement preferred_div_as_shifts_over_mult [PR108583]
This now implements a hook preferred_div_as_shifts_over_mult that indicates whether a target prefers that the vectorizer decomposes division as shifts rather than multiplication when possible. In order to be able to use this we need to check whether the current precision has enough bits to do the operation without any of the additions overflowing. We use range information to determine this and only do the operation if we're sure am overflow won't occur. This now uses ranger to do this range check. This seems to work better than vect_get_range_info which uses range_query, but I have not switched the interface of vect_get_range_info over in this PR fix. As Andy said before initializing a ranger instance is cheap but not free, and if the intention is to call it often during a pass it should be instantiated at pass startup and passed along to the places that need it. This is a big refactoring and doesn't seem right to do in this PR. But we should in GCC 14. Currently we only instantiate it after a long series of much cheaper checks. gcc/ChangeLog: PR target/108583 * target.def (preferred_div_as_shifts_over_mult): New. * doc/tm.texi.in: Document it. * doc/tm.texi: Regenerate. * targhooks.cc (default_preferred_div_as_shifts_over_mult): New. * targhooks.h (default_preferred_div_as_shifts_over_mult): New. * tree-vect-patterns.cc (vect_recog_divmod_pattern): Use it. gcc/testsuite/ChangeLog: PR target/108583 * gcc.dg/vect/vect-div-bitmask-4.c: New test. * gcc.dg/vect/vect-div-bitmask-5.c: New test.
Diffstat (limited to 'gcc/tree-vect-patterns.cc')
-rw-r--r--gcc/tree-vect-patterns.cc77
1 files changed, 77 insertions, 0 deletions
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 298fd29..887f02b 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -3934,6 +3934,83 @@ vect_recog_divmod_pattern (vec_info *vinfo,
return pattern_stmt;
}
+ if ((cst = uniform_integer_cst_p (oprnd1))
+ && TYPE_UNSIGNED (itype)
+ && rhs_code == TRUNC_DIV_EXPR
+ && vectype
+ && targetm.vectorize.preferred_div_as_shifts_over_mult (vectype))
+ {
+ /* We can use the relationship:
+
+ x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
+
+ to optimize cases where N+1 is a power of 2, and where // (N+1)
+ is therefore a shift right. When operating in modes that are
+ multiples of a byte in size, there are two cases:
+
+ (1) N(N+3) is not representable, in which case the question
+ becomes whether the replacement expression overflows.
+ It is enough to test that x+N+2 does not overflow,
+ i.e. that x < MAX-(N+1).
+
+ (2) N(N+3) is representable, in which case it is the (only)
+ bound that we need to check.
+
+ ??? For now we just handle the case where // (N+1) is a shift
+ right by half the precision, since some architectures can
+ optimize the associated addition and shift combinations
+ into single instructions. */
+
+ auto wcst = wi::to_wide (cst);
+ int pow = wi::exact_log2 (wcst + 1);
+ if (pow == prec / 2)
+ {
+ gimple *stmt = SSA_NAME_DEF_STMT (oprnd0);
+
+ gimple_ranger ranger;
+ int_range_max r;
+
+ /* Check that no overflow will occur. If we don't have range
+ information we can't perform the optimization. */
+
+ if (ranger.range_of_expr (r, oprnd0, stmt))
+ {
+ wide_int max = r.upper_bound ();
+ wide_int one = wi::shwi (1, prec);
+ wide_int adder = wi::add (one, wi::lshift (one, pow));
+ wi::overflow_type ovf;
+ wi::add (max, adder, UNSIGNED, &ovf);
+ if (ovf == wi::OVF_NONE)
+ {
+ *type_out = vectype;
+ tree tadder = wide_int_to_tree (itype, adder);
+ tree rshift = wide_int_to_tree (itype, pow);
+
+ tree new_lhs1 = vect_recog_temp_ssa_var (itype, NULL);
+ gassign *patt1
+ = gimple_build_assign (new_lhs1, PLUS_EXPR, oprnd0, tadder);
+ append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
+
+ tree new_lhs2 = vect_recog_temp_ssa_var (itype, NULL);
+ patt1 = gimple_build_assign (new_lhs2, RSHIFT_EXPR, new_lhs1,
+ rshift);
+ append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
+
+ tree new_lhs3 = vect_recog_temp_ssa_var (itype, NULL);
+ patt1 = gimple_build_assign (new_lhs3, PLUS_EXPR, new_lhs2,
+ oprnd0);
+ append_pattern_def_seq (vinfo, stmt_vinfo, patt1, vectype);
+
+ tree new_lhs4 = vect_recog_temp_ssa_var (itype, NULL);
+ pattern_stmt = gimple_build_assign (new_lhs4, RSHIFT_EXPR,
+ new_lhs3, rshift);
+
+ return pattern_stmt;
+ }
+ }
+ }
+ }
+
if (prec > HOST_BITS_PER_WIDE_INT
|| integer_zerop (oprnd1))
return NULL;