aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/aarch64
diff options
context:
space:
mode:
authorJennifer Schmitz <jschmitz@nvidia.com>2024-08-30 07:03:49 -0700
committerJennifer Schmitz <jschmitz@nvidia.com>2024-09-03 09:23:19 +0200
commitee8b7231b03a36dfc09d94f2b663636ca2a36daf (patch)
tree7ae240c76346e9897821af38f58b8ba2bcac9fff /gcc/config/aarch64
parent87217bea3aa556779a111cec0ef45dcefd1736f6 (diff)
downloadgcc-ee8b7231b03a36dfc09d94f2b663636ca2a36daf.zip
gcc-ee8b7231b03a36dfc09d94f2b663636ca2a36daf.tar.gz
gcc-ee8b7231b03a36dfc09d94f2b663636ca2a36daf.tar.bz2
SVE intrinsics: Fold constant operands for svdiv.
This patch implements constant folding for svdiv: The new function aarch64_const_binop was created, which - in contrast to int_const_binop - does not treat operations as overflowing. This function is passed as callback to vector_const_binop from the new gimple_folder method fold_const_binary, if the predicate is ptrue or predication is _x. From svdiv_impl::fold, fold_const_binary is called with TRUNC_DIV_EXPR as tree_code. In aarch64_const_binop, a case was added for TRUNC_DIV_EXPR to return 0 for division by 0, as defined in the semantics for svdiv. Tests were added to check the produced assembly for different predicates, signed and unsigned integers, and the svdiv_n_* case. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ * config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold): Try constant folding. * config/aarch64/aarch64-sve-builtins.h: Declare gimple_folder::fold_const_binary. * config/aarch64/aarch64-sve-builtins.cc (aarch64_const_binop): New function to fold binary SVE intrinsics without overflow. (gimple_folder::fold_const_binary): New helper function for constant folding of SVE intrinsics. gcc/testsuite/ * gcc.target/aarch64/sve/const_fold_div_1.c: New test.
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.cc11
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc43
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.h1
3 files changed, 52 insertions, 3 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index d55bee0..6c94d14 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -755,8 +755,13 @@ public:
gimple *
fold (gimple_folder &f) const override
{
- tree divisor = gimple_call_arg (f.call, 2);
- tree divisor_cst = uniform_integer_cst_p (divisor);
+ if (auto *res = f.fold_const_binary (TRUNC_DIV_EXPR))
+ return res;
+
+ /* If the divisor is a uniform power of 2, fold to a shift
+ instruction. */
+ tree op2 = gimple_call_arg (f.call, 2);
+ tree divisor_cst = uniform_integer_cst_p (op2);
if (!divisor_cst || !integer_pow2p (divisor_cst))
return NULL;
@@ -770,7 +775,7 @@ public:
shapes::binary_uint_opt_n, MODE_n,
f.type_suffix_ids, GROUP_none, f.pred);
call = f.redirect_call (instance);
- tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst;
+ tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : divisor_cst;
new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
}
else
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 5ca9ec3..8f9aa3c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1132,6 +1132,30 @@ report_not_enum (location_t location, tree fndecl, unsigned int argno,
" a valid %qT value", actual, argno + 1, fndecl, enumtype);
}
+/* Try to fold constant arguments ARG1 and ARG2 using the given tree_code.
+ Operations are not treated as overflowing. */
+static tree
+aarch64_const_binop (enum tree_code code, tree arg1, tree arg2)
+{
+ if (poly_int_tree_p (arg1) && poly_int_tree_p (arg2))
+ {
+ poly_wide_int poly_res;
+ tree type = TREE_TYPE (arg1);
+ signop sign = TYPE_SIGN (type);
+ wi::overflow_type overflow = wi::OVF_NONE;
+
+ /* Return 0 for division by 0, like SDIV and UDIV do. */
+ if (code == TRUNC_DIV_EXPR && integer_zerop (arg2))
+ return arg2;
+
+ if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow))
+ return NULL_TREE;
+ return force_fit_type (type, poly_res, false,
+ TREE_OVERFLOW (arg1) | TREE_OVERFLOW (arg2));
+ }
+ return NULL_TREE;
+}
+
/* Return a hash code for a function_instance. */
hashval_t
function_instance::hash () const
@@ -3593,6 +3617,25 @@ gimple_folder::fold_to_vl_pred (unsigned int vl)
return gimple_build_assign (lhs, builder.build ());
}
+/* Try to fold the call to a constant, given that, for integers, the call
+ is roughly equivalent to binary operation CODE. aarch64_const_binop
+ handles any differences between CODE and the intrinsic. */
+gimple *
+gimple_folder::fold_const_binary (enum tree_code code)
+{
+ gcc_assert (gimple_call_num_args (call) == 3);
+ tree pg = gimple_call_arg (call, 0);
+ tree op1 = gimple_call_arg (call, 1);
+ tree op2 = gimple_call_arg (call, 2);
+
+ if (type_suffix (0).integer_p
+ && (pred == PRED_x || is_ptrue (pg, type_suffix (0).element_bytes)))
+ if (tree res = vector_const_binop (code, op1, op2, aarch64_const_binop))
+ return gimple_build_assign (lhs, res);
+
+ return NULL;
+}
+
/* Try to fold the call. Return the new statement on success and null
on failure. */
gimple *
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 9ab6f20..e388050 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -636,6 +636,7 @@ public:
gimple *fold_to_pfalse ();
gimple *fold_to_ptrue ();
gimple *fold_to_vl_pred (unsigned int);
+ gimple *fold_const_binary (enum tree_code);
gimple *fold ();