diff options
author | Jennifer Schmitz <jschmitz@nvidia.com> | 2024-08-30 07:03:49 -0700 |
---|---|---|
committer | Jennifer Schmitz <jschmitz@nvidia.com> | 2024-09-03 09:23:19 +0200 |
commit | ee8b7231b03a36dfc09d94f2b663636ca2a36daf (patch) | |
tree | 7ae240c76346e9897821af38f58b8ba2bcac9fff /gcc/config/aarch64 | |
parent | 87217bea3aa556779a111cec0ef45dcefd1736f6 (diff) | |
download | gcc-ee8b7231b03a36dfc09d94f2b663636ca2a36daf.zip gcc-ee8b7231b03a36dfc09d94f2b663636ca2a36daf.tar.gz gcc-ee8b7231b03a36dfc09d94f2b663636ca2a36daf.tar.bz2 |
SVE intrinsics: Fold constant operands for svdiv.
This patch implements constant folding for svdiv:
The new function aarch64_const_binop was created, which - in contrast to
int_const_binop - does not treat operations as overflowing. This function is
passed as callback to vector_const_binop from the new gimple_folder
method fold_const_binary, if the predicate is ptrue or predication is _x.
From svdiv_impl::fold, fold_const_binary is called with TRUNC_DIV_EXPR as
tree_code.
In aarch64_const_binop, a case was added for TRUNC_DIV_EXPR to return 0
for division by 0, as defined in the semantics for svdiv.
Tests were added to check the produced assembly for different
predicates, signed and unsigned integers, and the svdiv_n_* case.
The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
OK for mainline?
Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com>
gcc/
* config/aarch64/aarch64-sve-builtins-base.cc (svdiv_impl::fold):
Try constant folding.
* config/aarch64/aarch64-sve-builtins.h: Declare
gimple_folder::fold_const_binary.
* config/aarch64/aarch64-sve-builtins.cc (aarch64_const_binop):
New function to fold binary SVE intrinsics without overflow.
(gimple_folder::fold_const_binary): New helper function for
constant folding of SVE intrinsics.
gcc/testsuite/
* gcc.target/aarch64/sve/const_fold_div_1.c: New test.
Diffstat (limited to 'gcc/config/aarch64')
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins-base.cc | 11 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins.cc | 43 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins.h | 1 |
3 files changed, 52 insertions, 3 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index d55bee0..6c94d14 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -755,8 +755,13 @@ public: gimple * fold (gimple_folder &f) const override { - tree divisor = gimple_call_arg (f.call, 2); - tree divisor_cst = uniform_integer_cst_p (divisor); + if (auto *res = f.fold_const_binary (TRUNC_DIV_EXPR)) + return res; + + /* If the divisor is a uniform power of 2, fold to a shift + instruction. */ + tree op2 = gimple_call_arg (f.call, 2); + tree divisor_cst = uniform_integer_cst_p (op2); if (!divisor_cst || !integer_pow2p (divisor_cst)) return NULL; @@ -770,7 +775,7 @@ public: shapes::binary_uint_opt_n, MODE_n, f.type_suffix_ids, GROUP_none, f.pred); call = f.redirect_call (instance); - tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst; + tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : divisor_cst; new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d)); } else diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 5ca9ec3..8f9aa3c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -1132,6 +1132,30 @@ report_not_enum (location_t location, tree fndecl, unsigned int argno, " a valid %qT value", actual, argno + 1, fndecl, enumtype); } +/* Try to fold constant arguments ARG1 and ARG2 using the given tree_code. + Operations are not treated as overflowing. */ +static tree +aarch64_const_binop (enum tree_code code, tree arg1, tree arg2) +{ + if (poly_int_tree_p (arg1) && poly_int_tree_p (arg2)) + { + poly_wide_int poly_res; + tree type = TREE_TYPE (arg1); + signop sign = TYPE_SIGN (type); + wi::overflow_type overflow = wi::OVF_NONE; + + /* Return 0 for division by 0, like SDIV and UDIV do. */ + if (code == TRUNC_DIV_EXPR && integer_zerop (arg2)) + return arg2; + + if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow)) + return NULL_TREE; + return force_fit_type (type, poly_res, false, + TREE_OVERFLOW (arg1) | TREE_OVERFLOW (arg2)); + } + return NULL_TREE; +} + /* Return a hash code for a function_instance. */ hashval_t function_instance::hash () const @@ -3593,6 +3617,25 @@ gimple_folder::fold_to_vl_pred (unsigned int vl) return gimple_build_assign (lhs, builder.build ()); } +/* Try to fold the call to a constant, given that, for integers, the call + is roughly equivalent to binary operation CODE. aarch64_const_binop + handles any differences between CODE and the intrinsic. */ +gimple * +gimple_folder::fold_const_binary (enum tree_code code) +{ + gcc_assert (gimple_call_num_args (call) == 3); + tree pg = gimple_call_arg (call, 0); + tree op1 = gimple_call_arg (call, 1); + tree op2 = gimple_call_arg (call, 2); + + if (type_suffix (0).integer_p + && (pred == PRED_x || is_ptrue (pg, type_suffix (0).element_bytes))) + if (tree res = vector_const_binop (code, op1, op2, aarch64_const_binop)) + return gimple_build_assign (lhs, res); + + return NULL; +} + /* Try to fold the call. Return the new statement on success and null on failure. */ gimple * diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index 9ab6f20..e388050 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -636,6 +636,7 @@ public: gimple *fold_to_pfalse (); gimple *fold_to_ptrue (); gimple *fold_to_vl_pred (unsigned int); + gimple *fold_const_binary (enum tree_code); gimple *fold (); |