diff options
author | Soumya AR <soumyaa@nvidia.com> | 2024-10-08 14:37:24 +0100 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2024-10-08 14:37:24 +0100 |
commit | c94adf02d31028a25bb7b20ec77aade9d502430b (patch) | |
tree | e1b70dc936921376f50dc567ee85dd2b078d2174 | |
parent | 34ae3a992a0cc3240d07d69ff12a664cbb5c8be0 (diff) | |
download | gcc-c94adf02d31028a25bb7b20ec77aade9d502430b.zip gcc-c94adf02d31028a25bb7b20ec77aade9d502430b.tar.gz gcc-c94adf02d31028a25bb7b20ec77aade9d502430b.tar.bz2 |
aarch64: Expand CTZ to RBIT + CLZ for SVE [PR109498]
Currently, we vectorize CTZ for SVE by using the following operation:
.CTZ (X) = (PREC - 1) - .CLZ (X & -X)
Instead, this patch expands CTZ to RBIT + CLZ for SVE, as suggested in PR109498.
The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
OK for mainline?
Signed-off-by: Soumya AR <soumyaa@nvidia.com>
gcc/ChangeLog:
PR target/109498
* config/aarch64/aarch64-sve.md (ctz<mode>2): Added pattern to expand
CTZ to RBIT + CLZ for SVE.
gcc/testsuite/ChangeLog:
PR target/109498
* gcc.target/aarch64/sve/ctz.c: New test.
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/ctz.c | 49 |
2 files changed, 66 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 90db51e..06bd3e4 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3088,6 +3088,23 @@ ;; - NOT ;; ------------------------------------------------------------------------- +(define_expand "ctz<mode>2" + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I + [(match_dup 2) + (ctz:SVE_I + (match_operand:SVE_I 1 "register_operand"))] + UNSPEC_PRED_X))] + "TARGET_SVE" + { + rtx pred = aarch64_ptrue_reg (<VPRED>mode); + rtx temp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_aarch64_pred_rbit<mode> (temp, pred, operands[1])); + emit_insn (gen_aarch64_pred_clz<mode> (operands[0], pred, temp)); + DONE; + } +) + ;; Unpredicated integer unary arithmetic. (define_expand "<optab><mode>2" [(set (match_operand:SVE_I 0 "register_operand") diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ctz.c b/gcc/testsuite/gcc.target/aarch64/sve/ctz.c new file mode 100644 index 0000000..433a917 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/ctz.c @@ -0,0 +1,49 @@ +/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-options "-O3 --param aarch64-autovec-preference=sve-only" } */ + +#include <stdint.h> + +#define FUNC(FUNCTION, NAME, DTYPE) \ +void \ +NAME (DTYPE *__restrict x, DTYPE *__restrict y, int n) { \ + for (int i = 0; i < n; i++) \ + x[i] = FUNCTION (y[i]); \ +} \ + + +/* +** ctz_uint8: +** ... +** rbit z[0-9]+\.b, p[0-7]/m, z[0-9]+\.b +** clz z[0-9]+\.b, p[0-7]/m, z[0-9]+\.b +** ... +*/ +FUNC (__builtin_ctzg, ctz_uint8, uint8_t) + +/* +** ctz_uint16: +** ... +** rbit z[0-9]+\.h, p[0-7]/m, z[0-9]+\.h +** clz z[0-9]+\.h, p[0-7]/m, z[0-9]+\.h +** ... +*/ +FUNC (__builtin_ctzg, ctz_uint16, uint16_t) + +/* +** ctz_uint32: +** ... +** rbit z[0-9]+\.s, p[0-7]/m, z[0-9]+\.s +** clz z[0-9]+\.s, p[0-7]/m, z[0-9]+\.s +** ... +*/ +FUNC (__builtin_ctz, ctz_uint32, uint32_t) + +/* +** ctz_uint64: +** ... +** rbit z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d +** clz z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d +** ... +*/ +FUNC (__builtin_ctzll, ctz_uint64, uint64_t) + |