diff options
author | Jennifer Schmitz <jschmitz@nvidia.com> | 2024-09-17 00:15:38 -0700 |
---|---|---|
committer | Jennifer Schmitz <jschmitz@nvidia.com> | 2024-09-19 09:23:24 +0200 |
commit | 08aba2dd8c9390b6131cca0aac069f97eeddc9d2 (patch) | |
tree | 2b527b57cd661c0b5035853cea04ed25c792cbb5 /gcc | |
parent | 9a99559a478111f7fbeec29bd78344df7651c707 (diff) | |
download | gcc-08aba2dd8c9390b6131cca0aac069f97eeddc9d2.zip gcc-08aba2dd8c9390b6131cca0aac069f97eeddc9d2.tar.gz gcc-08aba2dd8c9390b6131cca0aac069f97eeddc9d2.tar.bz2 |
SVE intrinsics: Fold svmul with all-zero operands to zero vector
As recently implemented for svdiv, this patch folds svmul to a zero
vector if one of the operands is a zero vector. This transformation is
applied if at least one of the following conditions is met:
- the first operand is all zeros or
- the second operand is all zeros, and the predicate is ptrue or the
predication is _x or _z.
In contrast to constant folding, which was implemented in a previous
patch, this transformation is applied as soon as one of the operands is
a zero vector, while the other operand can be a variable.
The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
OK for mainline?
Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com>
gcc/
* config/aarch64/aarch64-sve-builtins-base.cc (svmul_impl::fold):
Add folding of all-zero operands to zero vector.
gcc/testsuite/
* gcc.target/aarch64/sve/const_fold_mul_1.c: Adjust expected
outcome.
* gcc.target/aarch64/sve/fold_mul_zero.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins-base.cc | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/fold_mul_zero.c | 365 |
3 files changed, 383 insertions, 3 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 9f8af9b..afce52a 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -2020,7 +2020,22 @@ public: gimple * fold (gimple_folder &f) const override { - return f.fold_const_binary (MULT_EXPR); + if (auto *res = f.fold_const_binary (MULT_EXPR)) + return res; + + /* If one of the operands is all zeros, fold to zero vector. */ + tree op1 = gimple_call_arg (f.call, 1); + if (integer_zerop (op1)) + return gimple_build_assign (f.lhs, op1); + + tree pg = gimple_call_arg (f.call, 0); + tree op2 = gimple_call_arg (f.call, 2); + if (integer_zerop (op2) + && (f.pred != PRED_m + || is_ptrue (pg, f.type_suffix (0).element_bytes))) + return gimple_build_assign (f.lhs, build_zero_cst (TREE_TYPE (f.lhs))); + + return NULL; } }; diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c index 6d68607..2a00cab 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_1.c @@ -35,7 +35,7 @@ svint64_t s64_z_pg (svbool_t pg) /* ** s64_z_pg_0: -** mov z[0-9]+\.d, p[0-7]/z, #0 +** mov z[0-9]+\.b, #0 ** ret */ svint64_t s64_z_pg_0 (svbool_t pg) @@ -117,7 +117,7 @@ svint64_t s64_z_pg_n (svbool_t pg) /* ** s64_z_pg_n_s64_0: -** mov z[0-9]+\.d, p[0-7]/z, #0 +** mov z[0-9]+\.b, #0 ** ret */ svint64_t s64_z_pg_n_s64_0 (svbool_t pg) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fold_mul_zero.c b/gcc/testsuite/gcc.target/aarch64/sve/fold_mul_zero.c new file mode 100644 index 0000000..a5674fd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fold_mul_zero.c @@ -0,0 +1,365 @@ +/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-options "-O2" } */ + +#include "arm_sve.h" + +/* +** s64_x_pg_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2) +{ + return svmul_x (pg, svdup_s64 (0), op2); +} + +/* +** s64_z_pg_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2) +{ + return svmul_z (pg, svdup_s64 (0), op2); +} + +/* +** s64_m_pg_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2) +{ + return svmul_m (pg, svdup_s64 (0), op2); +} + +/* +** s64_x_ptrue_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_x_ptrue_op1 (svint64_t op2) +{ + return svmul_x (svptrue_b64 (), svdup_s64 (0), op2); +} + +/* +** s64_z_ptrue_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_z_ptrue_op1 (svint64_t op2) +{ + return svmul_z (svptrue_b64 (), svdup_s64 (0), op2); +} + +/* +** s64_m_ptrue_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_m_ptrue_op1 (svint64_t op2) +{ + return svmul_m (svptrue_b64 (), svdup_s64 (0), op2); +} + +/* +** s64_x_pg_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1) +{ + return svmul_x (pg, op1, svdup_s64 (0)); +} + +/* +** s64_z_pg_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1) +{ + return svmul_z (pg, op1, svdup_s64 (0)); +} + +/* +** s64_m_pg_op2: +** mov z[0-9]+\.d, p0/m, #0 +** ret +*/ +svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1) +{ + return svmul_m (pg, op1, svdup_s64 (0)); +} + +/* +** s64_x_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_x_ptrue_op2 (svint64_t op1) +{ + return svmul_x (svptrue_b64 (), op1, svdup_s64 (0)); +} + +/* +** s64_z_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_z_ptrue_op2 (svint64_t op1) +{ + return svmul_z (svptrue_b64 (), op1, svdup_s64 (0)); +} + +/* +** s64_m_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_m_ptrue_op2 (svint64_t op1) +{ + return svmul_m (svptrue_b64 (), op1, svdup_s64 (0)); +} + +/* +** s64_n_x_pg_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_n_x_pg_op2 (svbool_t pg, svint64_t op1) +{ + return svmul_n_s64_x (pg, op1, 0); +} + +/* +** s64_n_z_pg_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_n_z_pg_op2 (svbool_t pg, svint64_t op1) +{ + return svmul_n_s64_z (pg, op1, 0); +} + +/* +** s64_n_m_pg_op2: +** mov z[0-9]+\.d, p0/m, #0 +** ret +*/ +svint64_t s64_n_m_pg_op2 (svbool_t pg, svint64_t op1) +{ + return svmul_n_s64_m (pg, op1, 0); +} + +/* +** s64_n_x_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_n_x_ptrue_op2 (svint64_t op1) +{ + return svmul_n_s64_x (svptrue_b64 (), op1, 0); +} + +/* +** s64_n_z_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_n_z_ptrue_op2 (svint64_t op1) +{ + return svmul_n_s64_z (svptrue_b64 (), op1, 0); +} + +/* +** s64_n_m_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svint64_t s64_n_m_ptrue_op2 (svint64_t op1) +{ + return svmul_n_s64_m (svptrue_b64 (), op1, 0); +} + +/* +** u64_x_pg_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_x_pg_op1 (svbool_t pg, svuint64_t op2) +{ + return svmul_x (pg, svdup_u64 (0), op2); +} + +/* +** u64_z_pg_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_z_pg_op1 (svbool_t pg, svuint64_t op2) +{ + return svmul_z (pg, svdup_u64 (0), op2); +} + +/* +** u64_m_pg_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_m_pg_op1 (svbool_t pg, svuint64_t op2) +{ + return svmul_m (pg, svdup_u64 (0), op2); +} + +/* +** u64_x_ptrue_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_x_ptrue_op1 (svuint64_t op2) +{ + return svmul_x (svptrue_b64 (), svdup_u64 (0), op2); +} + +/* +** u64_z_ptrue_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_z_ptrue_op1 (svuint64_t op2) +{ + return svmul_z (svptrue_b64 (), svdup_u64 (0), op2); +} + +/* +** u64_m_ptrue_op1: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_m_ptrue_op1 (svuint64_t op2) +{ + return svmul_m (svptrue_b64 (), svdup_u64 (0), op2); +} + +/* +** u64_x_pg_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_x_pg_op2 (svbool_t pg, svuint64_t op1) +{ + return svmul_x (pg, op1, svdup_u64 (0)); +} + +/* +** u64_z_pg_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_z_pg_op2 (svbool_t pg, svuint64_t op1) +{ + return svmul_z (pg, op1, svdup_u64 (0)); +} + +/* +** u64_m_pg_op2: +** mov z[0-9]+\.d, p0/m, #0 +** ret +*/ +svuint64_t u64_m_pg_op2 (svbool_t pg, svuint64_t op1) +{ + return svmul_m (pg, op1, svdup_u64 (0)); +} + +/* +** u64_x_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_x_ptrue_op2 (svuint64_t op1) +{ + return svmul_x (svptrue_b64 (), op1, svdup_u64 (0)); +} + +/* +** u64_z_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_z_ptrue_op2 (svuint64_t op1) +{ + return svmul_z (svptrue_b64 (), op1, svdup_u64 (0)); +} + +/* +** u64_m_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_m_ptrue_op2 (svuint64_t op1) +{ + return svmul_m (svptrue_b64 (), op1, svdup_u64 (0)); +} + +/* +** u64_n_x_pg_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_n_x_pg_op2 (svbool_t pg, svuint64_t op1) +{ + return svmul_n_u64_x (pg, op1, 0); +} + +/* +** u64_n_z_pg_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_n_z_pg_op2 (svbool_t pg, svuint64_t op1) +{ + return svmul_n_u64_z (pg, op1, 0); +} + +/* +** u64_n_m_pg_op2: +** mov z[0-9]+\.d, p0/m, #0 +** ret +*/ +svuint64_t u64_n_m_pg_op2 (svbool_t pg, svuint64_t op1) +{ + return svmul_n_u64_m (pg, op1, 0); +} + +/* +** u64_n_x_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_n_x_ptrue_op2 (svuint64_t op1) +{ + return svmul_n_u64_x (svptrue_b64 (), op1, 0); +} + +/* +** u64_n_z_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_n_z_ptrue_op2 (svuint64_t op1) +{ + return svmul_n_u64_z (svptrue_b64 (), op1, 0); +} + +/* +** u64_n_m_ptrue_op2: +** mov z[0-9]+\.b, #0 +** ret +*/ +svuint64_t u64_n_m_ptrue_op2 (svuint64_t op1) +{ + return svmul_n_u64_m (svptrue_b64 (), op1, 0); +} + |