aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2021-01-08 10:49:38 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2021-01-08 10:49:38 +0000
commit5fe3e6bf061da8d9b0e759927c340fe8e0f44725 (patch)
treea929a8f3f861e2abfb21b74093074ac73f6a7967
parentf3c5d1fa53ad85424ce05f19583223ad2f413cfa (diff)
downloadgcc-5fe3e6bf061da8d9b0e759927c340fe8e0f44725.zip
gcc-5fe3e6bf061da8d9b0e759927c340fe8e0f44725.tar.gz
gcc-5fe3e6bf061da8d9b0e759927c340fe8e0f44725.tar.bz2
aarch64: Support unpacked CNOT on SVE
This patch adds unpacked support for unconditional and conditional CNOT. The type suffix has to be taken from the element size rather than the container size. gcc/ * config/aarch64/aarch64-sve.md (*cnot<mode>): Extend from SVE_FULL_I to SVE_I. (*cond_cnot<mode>_2, *cond_cnot<mode>_any): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/cnot_2.c: New test. * gcc.target/aarch64/sve/cond_cnot_4.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_4_run.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_5.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_5_run.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_6.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_6_run.c: Likewise.
-rw-r--r--gcc/config/aarch64/aarch64-sve.md36
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c29
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c26
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c26
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c31
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c26
8 files changed, 220 insertions, 18 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index b83f991..2f5a5e3 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3227,16 +3227,16 @@
)
(define_insn "*cnot<mode>"
- [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_FULL_I
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
[(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
(eq:<VPRED>
- (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
+ (match_operand:SVE_I 2 "register_operand" "0, w")
+ (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
UNSPEC_PRED_Z)
- (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
+ (match_operand:SVE_I 4 "aarch64_simd_imm_one")
(match_dup 3)]
UNSPEC_SEL))]
"TARGET_SVE"
@@ -3274,19 +3274,19 @@
;; Predicated logical inverse, merging with the first input.
(define_insn_and_rewrite "*cond_cnot<mode>_2"
- [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_FULL_I
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
;; Logical inverse of operand 2 (as above).
- (unspec:SVE_FULL_I
+ (unspec:SVE_I
[(unspec:<VPRED>
[(match_operand 5)
(const_int SVE_KNOWN_PTRUE)
(eq:<VPRED>
- (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
+ (match_operand:SVE_I 2 "register_operand" "0, w")
+ (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
UNSPEC_PRED_Z)
- (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
+ (match_operand:SVE_I 4 "aarch64_simd_imm_one")
(match_dup 3)]
UNSPEC_SEL)
(match_dup 2)]
@@ -3310,22 +3310,22 @@
;; as earlyclobber helps to make the instruction more regular to the
;; register allocator.
(define_insn_and_rewrite "*cond_cnot<mode>_any"
- [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
- (unspec:SVE_FULL_I
+ [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
+ (unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
;; Logical inverse of operand 2 (as above).
- (unspec:SVE_FULL_I
+ (unspec:SVE_I
[(unspec:<VPRED>
[(match_operand 5)
(const_int SVE_KNOWN_PTRUE)
(eq:<VPRED>
- (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
+ (match_operand:SVE_I 2 "register_operand" "w, w, w")
+ (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
UNSPEC_PRED_Z)
- (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
+ (match_operand:SVE_I 4 "aarch64_simd_imm_one")
(match_dup 3)]
UNSPEC_SEL)
- (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
"TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
"@
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c
new file mode 100644
index 0000000..fe77823
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE1##_##TYPE2##_##TYPE3 (TYPE2 *restrict r, \
+ TYPE1 *restrict pred, \
+ TYPE2 *restrict a) \
+ { \
+ for (int i = 0; i < COUNT; ++i) \
+ if (pred[i]) \
+ r[i] = !a[i]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int16_t, int8_t, 7) \
+ T (int32_t, int8_t, 3) \
+ T (int32_t, int16_t, 3) \
+ T (int64_t, int8_t, 5) \
+ T (int64_t, int16_t, 5) \
+ T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
new file mode 100644
index 0000000..729d3f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r, \
+ TYPE2 *__restrict a, \
+ TYPE1 *__restrict pred) \
+ { \
+ for (int i = 0; i < COUNT; ++i) \
+ r[i] = pred[i] ? !a[i] : a[i]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int16_t, int8_t, 7) \
+ T (int32_t, int8_t, 3) \
+ T (int32_t, int16_t, 3) \
+ T (int64_t, int8_t, 5) \
+ T (int64_t, int16_t, 5) \
+ T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c
new file mode 100644
index 0000000..de9c0a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c
@@ -0,0 +1,26 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_cnot_4.c"
+
+#define TEST_LOOP(TYPE1, TYPE2, N) \
+ { \
+ TYPE1 pred[N]; \
+ TYPE2 r[N], a[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = i & 1 ? 0 : 3 * (i + 1); \
+ pred[i] = (i % 3 < 2); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE1##_##TYPE2 (r, a, pred); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE2) (pred[i] ? !a[i] : a[i])) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c
new file mode 100644
index 0000000..7318e10
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r, \
+ TYPE1 *__restrict a, \
+ TYPE2 *__restrict b) \
+ { \
+ for (int i = 0; i < COUNT; ++i) \
+ r[i] = a[i] == 0 ? !b[i] : a[i]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int16_t, int8_t, 7) \
+ T (int32_t, int8_t, 3) \
+ T (int32_t, int16_t, 3) \
+ T (int64_t, int8_t, 5) \
+ T (int64_t, int16_t, 5) \
+ T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c
new file mode 100644
index 0000000..f8f277c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c
@@ -0,0 +1,26 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_cnot_5.c"
+
+#define TEST_LOOP(TYPE1, TYPE2, N) \
+ { \
+ TYPE1 a[N]; \
+ TYPE2 r[N], b[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = i % 3 < 2 ? 0 : i * 42; \
+ b[i] = i & 1 ? 0 : 3 * (i + 1); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE1##_##TYPE2 (r, a, b); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE2) (a[i] == 0 ? !b[i] : a[i])) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
new file mode 100644
index 0000000..d44e357
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define DEF_LOOP(TYPE1, TYPE2, COUNT) \
+ void __attribute__ ((noipa)) \
+ test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r, \
+ TYPE1 *__restrict a, \
+ TYPE2 *__restrict b) \
+ { \
+ for (int i = 0; i < COUNT; ++i) \
+ r[i] = a[i] == 0 ? !b[i] : 127; \
+ }
+
+#define TEST_ALL(T) \
+ T (int16_t, int8_t, 7) \
+ T (int32_t, int8_t, 3) \
+ T (int32_t, int16_t, 3) \
+ T (int64_t, int8_t, 5) \
+ T (int64_t, int16_t, 5) \
+ T (int64_t, int32_t, 5)
+
+TEST_ALL (DEF_LOOP)
+
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */
+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */
+/* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c
new file mode 100644
index 0000000..9e33616
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c
@@ -0,0 +1,26 @@
+/* { dg-do run { target { aarch64_sve_hw } } } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include "cond_cnot_6.c"
+
+#define TEST_LOOP(TYPE1, TYPE2, N) \
+ { \
+ TYPE1 a[N]; \
+ TYPE2 r[N], b[N]; \
+ for (int i = 0; i < N; ++i) \
+ { \
+ a[i] = i % 3 < 2 ? 0 : i * 42; \
+ b[i] = i & 1 ? 0 : 3 * (i + 1); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ test_##TYPE1##_##TYPE2 (r, a, b); \
+ for (int i = 0; i < N; ++i) \
+ if (r[i] != (TYPE2) (a[i] == 0 ? !b[i] : 127)) \
+ __builtin_abort (); \
+ }
+
+int main ()
+{
+ TEST_ALL (TEST_LOOP)
+ return 0;
+}