diff options
author | Jennifer Schmitz <jschmitz@nvidia.com> | 2024-10-24 05:11:31 -0700 |
---|---|---|
committer | Jennifer Schmitz <jschmitz@nvidia.com> | 2024-10-25 08:54:14 +0200 |
commit | 0b22f0585348335369298c7d39afd171758eebe9 (patch) | |
tree | 7adeb0b713841c32af4e91fea3c71cc7c2f9a120 /gcc | |
parent | 6aba48a8cc128e54ee243d451ac9a843ff41c4f9 (diff) | |
download | gcc-0b22f0585348335369298c7d39afd171758eebe9.zip gcc-0b22f0585348335369298c7d39afd171758eebe9.tar.gz gcc-0b22f0585348335369298c7d39afd171758eebe9.tar.bz2 |
SVE intrinsics: Fold svaba with op1 all zeros to svabd.
Similar to
https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665780.html,
this patch implements folding of svaba to svabd if op1 is all zeros,
resulting in the use of UABD/SABD instructions instead of UABA/SABA.
Tests were added to check the produced assembly for use of UABD/SABD,
also for the _n case.
The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
OK for mainline?
Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com>
gcc/
* config/aarch64/aarch64-sve-builtins-sve2.cc
(svaba_impl::fold): Fold svaba to svabd if op1 is all zeros.
gcc/testsuite/
* gcc.target/aarch64/sve2/acle/asm/aba_s32.c: New tests.
* gcc.target/aarch64/sve2/acle/asm/aba_s64.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/aba_u32.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/aba_u64.c: Likewise.
Diffstat (limited to 'gcc')
5 files changed, 107 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index ddd6e46..d29c220 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -81,6 +81,24 @@ unspec_sqrdcmlah (int rot) class svaba_impl : public function_base { public: + gimple * + fold (gimple_folder &f) const override + { + /* Fold to svabd if op1 is all zeros. */ + tree op1 = gimple_call_arg (f.call, 0); + if (!integer_zerop (op1)) + return NULL; + function_instance instance ("svabd", functions::svabd, + shapes::binary_opt_n, f.mode_suffix_id, + f.type_suffix_ids, GROUP_none, PRED_x); + gcall *call = f.redirect_call (instance); + /* Add a ptrue as predicate, because unlike svaba, svabd is + predicated. */ + gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ())); + return call; + } + +public: rtx expand (function_expander &e) const override { diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c index 73c0028..655ad63 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s32.c @@ -108,3 +108,26 @@ TEST_UNIFORM_Z (aba_11_s32_tied2, svint32_t, TEST_UNIFORM_Z (aba_11_s32_untied, svint32_t, z0 = svaba_n_s32 (z1, z2, 11), z0 = svaba (z1, z2, 11)) + +/* +** aba_11_s32_zeroop1n: +** ptrue (p[0-7])\.b, all +** mov z0\.s, #11 +** sabd z0\.s, \1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (aba_11_s32_zeroop1n, svint32_t, + z0 = svaba_n_s32 (svdup_s32 (0), z1, 11), + z0 = svaba (svdup_s32 (0), z1, 11)) + + +/* +** aba_11_s32_zeroop1: +** ptrue (p[0-7])\.b, all +** mov z0\.s, #11 +** sabd z0\.s, \1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (aba_11_s32_zeroop1, svint32_t, + z0 = svaba_s32 (svdup_s32 (0), z1, svdup_s32 (11)), + z0 = svaba (svdup_s32 (0), z1, svdup_s32 (11))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c index 0c169db..8b1eb7d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_s64.c @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_s64_tied2, svint64_t, TEST_UNIFORM_Z (aba_11_s64_untied, svint64_t, z0 = svaba_n_s64 (z1, z2, 11), z0 = svaba (z1, z2, 11)) + +/* +** aba_11_s64_zeroop1n: +** ptrue (p[0-7])\.b, all +** mov z0\.d, #11 +** sabd z0\.d, \1/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (aba_11_s64_zeroop1n, svint64_t, + z0 = svaba_n_s64 (svdup_s64 (0), z1, 11), + z0 = svaba (svdup_s64 (0), z1, 11)) + +/* +** aba_11_s64_zeroop1: +** ptrue (p[0-7])\.b, all +** mov z0\.d, #11 +** sabd z0\.d, \1/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (aba_11_s64_zeroop1, svint64_t, + z0 = svaba_s64 (svdup_s64 (0), z1, svdup_s64 (11)), + z0 = svaba (svdup_s64 (0), z1, svdup_s64 (11))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c index 2ba8f41..fc2fed2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u32.c @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_u32_tied2, svuint32_t, TEST_UNIFORM_Z (aba_11_u32_untied, svuint32_t, z0 = svaba_n_u32 (z1, z2, 11), z0 = svaba (z1, z2, 11)) + +/* +** aba_11_u32_zeroop1n: +** ptrue (p[0-7])\.b, all +** mov z0\.s, #11 +** uabd z0\.s, \1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (aba_11_u32_zeroop1n, svuint32_t, + z0 = svaba_n_u32 (svdup_u32 (0), z1, 11), + z0 = svaba (svdup_u32 (0), z1, 11)) + +/* +** aba_11_u32_zeroop1: +** ptrue (p[0-7])\.b, all +** mov z0\.s, #11 +** uabd z0\.s, \1/m, z0\.s, z1\.s +** ret +*/ +TEST_UNIFORM_Z (aba_11_u32_zeroop1, svuint32_t, + z0 = svaba_u32 (svdup_u32 (0), z1, svdup_u32 (11)), + z0 = svaba (svdup_u32 (0), z1, svdup_u32 (11))) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c index 8c6bef02..f6ed216 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aba_u64.c @@ -108,3 +108,25 @@ TEST_UNIFORM_Z (aba_11_u64_tied2, svuint64_t, TEST_UNIFORM_Z (aba_11_u64_untied, svuint64_t, z0 = svaba_n_u64 (z1, z2, 11), z0 = svaba (z1, z2, 11)) + +/* +** aba_11_u64_zeroop1n: +** ptrue (p[0-7])\.b, all +** mov z0\.d, #11 +** uabd z0\.d, \1/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (aba_11_u64_zeroop1n, svuint64_t, + z0 = svaba_n_u64 (svdup_u64 (0), z1, 11), + z0 = svaba (svdup_u64 (0), z1, 11)) + +/* +** aba_11_u64_zeroop1: +** ptrue (p[0-7])\.b, all +** mov z0\.d, #11 +** uabd z0\.d, \1/m, z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (aba_11_u64_zeroop1, svuint64_t, + z0 = svaba_u64 (svdup_u64 (0), z1, svdup_u64 (11)), + z0 = svaba (svdup_u64 (0), z1, svdup_u64 (11))) |