aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSoumya AR <soumyaa@nvidia.com>2024-10-08 14:37:24 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2024-10-08 14:37:24 +0100
commitc94adf02d31028a25bb7b20ec77aade9d502430b (patch)
treee1b70dc936921376f50dc567ee85dd2b078d2174
parent34ae3a992a0cc3240d07d69ff12a664cbb5c8be0 (diff)
downloadgcc-c94adf02d31028a25bb7b20ec77aade9d502430b.zip
gcc-c94adf02d31028a25bb7b20ec77aade9d502430b.tar.gz
gcc-c94adf02d31028a25bb7b20ec77aade9d502430b.tar.bz2
aarch64: Expand CTZ to RBIT + CLZ for SVE [PR109498]
Currently, we vectorize CTZ for SVE by using the following operation: .CTZ (X) = (PREC - 1) - .CLZ (X & -X) Instead, this patch expands CTZ to RBIT + CLZ for SVE, as suggested in PR109498. The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Soumya AR <soumyaa@nvidia.com> gcc/ChangeLog: PR target/109498 * config/aarch64/aarch64-sve.md (ctz<mode>2): Added pattern to expand CTZ to RBIT + CLZ for SVE. gcc/testsuite/ChangeLog: PR target/109498 * gcc.target/aarch64/sve/ctz.c: New test.
-rw-r--r--gcc/config/aarch64/aarch64-sve.md17
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/ctz.c49
2 files changed, 66 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 90db51e..06bd3e4 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3088,6 +3088,23 @@
;; - NOT
;; -------------------------------------------------------------------------
+(define_expand "ctz<mode>2"
+ [(set (match_operand:SVE_I 0 "register_operand")
+ (unspec:SVE_I
+ [(match_dup 2)
+ (ctz:SVE_I
+ (match_operand:SVE_I 1 "register_operand"))]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE"
+ {
+ rtx pred = aarch64_ptrue_reg (<VPRED>mode);
+ rtx temp = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_pred_rbit<mode> (temp, pred, operands[1]));
+ emit_insn (gen_aarch64_pred_clz<mode> (operands[0], pred, temp));
+ DONE;
+ }
+)
+
;; Unpredicated integer unary arithmetic.
(define_expand "<optab><mode>2"
[(set (match_operand:SVE_I 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ctz.c b/gcc/testsuite/gcc.target/aarch64/sve/ctz.c
new file mode 100644
index 0000000..433a917
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/ctz.c
@@ -0,0 +1,49 @@
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-options "-O3 --param aarch64-autovec-preference=sve-only" } */
+
+#include <stdint.h>
+
+#define FUNC(FUNCTION, NAME, DTYPE) \
+void \
+NAME (DTYPE *__restrict x, DTYPE *__restrict y, int n) { \
+ for (int i = 0; i < n; i++) \
+ x[i] = FUNCTION (y[i]); \
+} \
+
+
+/*
+** ctz_uint8:
+** ...
+** rbit z[0-9]+\.b, p[0-7]/m, z[0-9]+\.b
+** clz z[0-9]+\.b, p[0-7]/m, z[0-9]+\.b
+** ...
+*/
+FUNC (__builtin_ctzg, ctz_uint8, uint8_t)
+
+/*
+** ctz_uint16:
+** ...
+** rbit z[0-9]+\.h, p[0-7]/m, z[0-9]+\.h
+** clz z[0-9]+\.h, p[0-7]/m, z[0-9]+\.h
+** ...
+*/
+FUNC (__builtin_ctzg, ctz_uint16, uint16_t)
+
+/*
+** ctz_uint32:
+** ...
+** rbit z[0-9]+\.s, p[0-7]/m, z[0-9]+\.s
+** clz z[0-9]+\.s, p[0-7]/m, z[0-9]+\.s
+** ...
+*/
+FUNC (__builtin_ctz, ctz_uint32, uint32_t)
+
+/*
+** ctz_uint64:
+** ...
+** rbit z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d
+** clz z[0-9]+\.d, p[0-7]/m, z[0-9]+\.d
+** ...
+*/
+FUNC (__builtin_ctzll, ctz_uint64, uint64_t)
+