diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2021-01-08 10:49:38 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2021-01-08 10:49:38 +0000 |
commit | 5fe3e6bf061da8d9b0e759927c340fe8e0f44725 (patch) | |
tree | a929a8f3f861e2abfb21b74093074ac73f6a7967 /gcc | |
parent | f3c5d1fa53ad85424ce05f19583223ad2f413cfa (diff) | |
download | gcc-5fe3e6bf061da8d9b0e759927c340fe8e0f44725.zip gcc-5fe3e6bf061da8d9b0e759927c340fe8e0f44725.tar.gz gcc-5fe3e6bf061da8d9b0e759927c340fe8e0f44725.tar.bz2 |
aarch64: Support unpacked CNOT on SVE
This patch adds unpacked support for unconditional and
conditional CNOT. The type suffix has to be taken from
the element size rather than the container size.
gcc/
* config/aarch64/aarch64-sve.md (*cnot<mode>): Extend from
SVE_FULL_I to SVE_I.
(*cond_cnot<mode>_2, *cond_cnot<mode>_any): Likewise.
gcc/testsuite/
* gcc.target/aarch64/sve/cnot_2.c: New test.
* gcc.target/aarch64/sve/cond_cnot_4.c: Likewise.
* gcc.target/aarch64/sve/cond_cnot_4_run.c: Likewise.
* gcc.target/aarch64/sve/cond_cnot_5.c: Likewise.
* gcc.target/aarch64/sve/cond_cnot_5_run.c: Likewise.
* gcc.target/aarch64/sve/cond_cnot_6.c: Likewise.
* gcc.target/aarch64/sve/cond_cnot_6_run.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 36 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c | 29 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c | 32 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c | 26 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c | 32 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c | 26 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c | 31 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c | 26 |
8 files changed, 220 insertions, 18 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index b83f991..2f5a5e3 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3227,16 +3227,16 @@ ) (define_insn "*cnot<mode>" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(unspec:<VPRED> [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") (match_operand:SI 5 "aarch64_sve_ptrue_flag") (eq:<VPRED> - (match_operand:SVE_FULL_I 2 "register_operand" "0, w") - (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] + (match_operand:SVE_I 2 "register_operand" "0, w") + (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] UNSPEC_PRED_Z) - (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") + (match_operand:SVE_I 4 "aarch64_simd_imm_one") (match_dup 3)] UNSPEC_SEL))] "TARGET_SVE" @@ -3274,19 +3274,19 @@ ;; Predicated logical inverse, merging with the first input. (define_insn_and_rewrite "*cond_cnot<mode>_2" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") ;; Logical inverse of operand 2 (as above). - (unspec:SVE_FULL_I + (unspec:SVE_I [(unspec:<VPRED> [(match_operand 5) (const_int SVE_KNOWN_PTRUE) (eq:<VPRED> - (match_operand:SVE_FULL_I 2 "register_operand" "0, w") - (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] + (match_operand:SVE_I 2 "register_operand" "0, w") + (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] UNSPEC_PRED_Z) - (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") + (match_operand:SVE_I 4 "aarch64_simd_imm_one") (match_dup 3)] UNSPEC_SEL) (match_dup 2)] @@ -3310,22 +3310,22 @@ ;; as earlyclobber helps to make the instruction more regular to the ;; register allocator. (define_insn_and_rewrite "*cond_cnot<mode>_any" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") ;; Logical inverse of operand 2 (as above). - (unspec:SVE_FULL_I + (unspec:SVE_I [(unspec:<VPRED> [(match_operand 5) (const_int SVE_KNOWN_PTRUE) (eq:<VPRED> - (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") - (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] + (match_operand:SVE_I 2 "register_operand" "w, w, w") + (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] UNSPEC_PRED_Z) - (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") + (match_operand:SVE_I 4 "aarch64_simd_imm_one") (match_dup 3)] UNSPEC_SEL) - (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")] + (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])" "@ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c new file mode 100644 index 0000000..fe77823 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include <stdint.h> + +#define DEF_LOOP(TYPE1, TYPE2, COUNT) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2##_##TYPE3 (TYPE2 *restrict r, \ + TYPE1 *restrict pred, \ + TYPE2 *restrict a) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + if (pred[i]) \ + r[i] = !a[i]; \ + } + +#define TEST_ALL(T) \ + T (int16_t, int8_t, 7) \ + T (int32_t, int8_t, 3) \ + T (int32_t, int16_t, 3) \ + T (int64_t, int8_t, 5) \ + T (int64_t, int16_t, 5) \ + T (int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c new file mode 100644 index 0000000..729d3f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include <stdint.h> + +#define DEF_LOOP(TYPE1, TYPE2, COUNT) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r, \ + TYPE2 *__restrict a, \ + TYPE1 *__restrict pred) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + r[i] = pred[i] ? !a[i] : a[i]; \ + } + +#define TEST_ALL(T) \ + T (int16_t, int8_t, 7) \ + T (int32_t, int8_t, 3) \ + T (int32_t, int16_t, 3) \ + T (int64_t, int8_t, 5) \ + T (int64_t, int16_t, 5) \ + T (int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c new file mode 100644 index 0000000..de9c0a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c @@ -0,0 +1,26 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_cnot_4.c" + +#define TEST_LOOP(TYPE1, TYPE2, N) \ + { \ + TYPE1 pred[N]; \ + TYPE2 r[N], a[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = i & 1 ? 0 : 3 * (i + 1); \ + pred[i] = (i % 3 < 2); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE1##_##TYPE2 (r, a, pred); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE2) (pred[i] ? !a[i] : a[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c new file mode 100644 index 0000000..7318e10 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include <stdint.h> + +#define DEF_LOOP(TYPE1, TYPE2, COUNT) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r, \ + TYPE1 *__restrict a, \ + TYPE2 *__restrict b) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + r[i] = a[i] == 0 ? !b[i] : a[i]; \ + } + +#define TEST_ALL(T) \ + T (int16_t, int8_t, 7) \ + T (int32_t, int8_t, 3) \ + T (int32_t, int16_t, 3) \ + T (int64_t, int8_t, 5) \ + T (int64_t, int16_t, 5) \ + T (int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c new file mode 100644 index 0000000..f8f277c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c @@ -0,0 +1,26 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_cnot_5.c" + +#define TEST_LOOP(TYPE1, TYPE2, N) \ + { \ + TYPE1 a[N]; \ + TYPE2 r[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = i % 3 < 2 ? 0 : i * 42; \ + b[i] = i & 1 ? 0 : 3 * (i + 1); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE1##_##TYPE2 (r, a, b); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE2) (a[i] == 0 ? !b[i] : a[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c new file mode 100644 index 0000000..d44e357 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include <stdint.h> + +#define DEF_LOOP(TYPE1, TYPE2, COUNT) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r, \ + TYPE1 *__restrict a, \ + TYPE2 *__restrict b) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + r[i] = a[i] == 0 ? !b[i] : 127; \ + } + +#define TEST_ALL(T) \ + T (int16_t, int8_t, 7) \ + T (int32_t, int8_t, 3) \ + T (int32_t, int16_t, 3) \ + T (int64_t, int8_t, 5) \ + T (int64_t, int16_t, 5) \ + T (int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c new file mode 100644 index 0000000..9e33616 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c @@ -0,0 +1,26 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_cnot_6.c" + +#define TEST_LOOP(TYPE1, TYPE2, N) \ + { \ + TYPE1 a[N]; \ + TYPE2 r[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = i % 3 < 2 ? 0 : i * 42; \ + b[i] = i & 1 ? 0 : 3 * (i + 1); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE1##_##TYPE2 (r, a, b); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE2) (a[i] == 0 ? !b[i] : 127)) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} |