diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2021-01-13 13:00:13 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2021-01-13 13:00:13 +0000 |
commit | 264a1269b44ccd4e5c37307d49461dd4ff52457c (patch) | |
tree | dbc8c12ea6d6474c7bc7b8caf62a11b5ff658145 /gcc | |
parent | cf7a335306153a5b01f3e1d081456ea4c32baefc (diff) | |
download | gcc-264a1269b44ccd4e5c37307d49461dd4ff52457c.zip gcc-264a1269b44ccd4e5c37307d49461dd4ff52457c.tar.gz gcc-264a1269b44ccd4e5c37307d49461dd4ff52457c.tar.bz2 |
aarch64: Add support for unpacked SVE MLS and MSB
This patch extends the MLS/MSB patterns to support unpacked
integer vectors. The type suffix could be either the element
size or the container size, but using the element size should
be more efficient.
gcc/
* config/aarch64/aarch64-sve.md (fnma<mode>4): Extend from SVE_FULL_I
to SVE_I.
(@aarch64_pred_fnma<mode>, cond_fnma<mode>, *cond_fnma<mode>_2)
(*cond_fnma<mode>_4, *cond_fnma<mode>_any): Likewise.
gcc/testsuite/
* gcc.target/aarch64/sve/mls_2.c: New test.
* g++.target/aarch64/sve/cond_mls_1.C: Likewise.
* g++.target/aarch64/sve/cond_mls_2.C: Likewise.
* g++.target/aarch64/sve/cond_mls_3.C: Likewise.
* g++.target/aarch64/sve/cond_mls_4.C: Likewise.
* g++.target/aarch64/sve/cond_mls_5.C: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 88 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/aarch64/sve/cond_mls_1.C | 33 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/aarch64/sve/cond_mls_2.C | 33 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/aarch64/sve/cond_mls_3.C | 33 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/aarch64/sve/cond_mls_4.C | 36 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/aarch64/sve/cond_mls_5.C | 33 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/mls_2.c | 34 |
7 files changed, 246 insertions, 44 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index ac8a9b4..da15bd8 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -6695,14 +6695,14 @@ ;; Unpredicated integer subtraction of product. (define_expand "fnma<mode>4" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 3 "register_operand") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand") + (minus:SVE_I + (match_operand:SVE_I 3 "register_operand") + (unspec:SVE_I [(match_dup 4) - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 1 "register_operand") - (match_operand:SVE_FULL_I 2 "general_operand"))] + (mult:SVE_I + (match_operand:SVE_I 1 "register_operand") + (match_operand:SVE_I 2 "general_operand"))] UNSPEC_PRED_X)))] "TARGET_SVE" { @@ -6714,14 +6714,14 @@ ;; Predicated integer subtraction of product. (define_insn "@aarch64_pred_fnma<mode>" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") + (minus:SVE_I + (match_operand:SVE_I 4 "register_operand" "w, 0, w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))] + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "%0, w, w") + (match_operand:SVE_I 3 "register_operand" "w, w, w"))] UNSPEC_PRED_X)))] "TARGET_SVE" "@ @@ -6733,15 +6733,15 @@ ;; Predicated integer subtraction of product with merging. (define_expand "cond_fnma<mode>" - [(set (match_operand:SVE_FULL_I 0 "register_operand") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 4 "register_operand") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand") - (match_operand:SVE_FULL_I 3 "general_operand"))) - (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")] + (minus:SVE_I + (match_operand:SVE_I 4 "register_operand") + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand") + (match_operand:SVE_I 3 "general_operand"))) + (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE" { @@ -6756,14 +6756,14 @@ ;; Predicated integer subtraction of product, merging with the first input. (define_insn "*cond_fnma<mode>_2" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 4 "register_operand" "w, w") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "0, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))) + (minus:SVE_I + (match_operand:SVE_I 4 "register_operand" "w, w") + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "0, w") + (match_operand:SVE_I 3 "register_operand" "w, w"))) (match_dup 2)] UNSPEC_SEL))] "TARGET_SVE" @@ -6775,14 +6775,14 @@ ;; Predicated integer subtraction of product, merging with the third input. (define_insn "*cond_fnma<mode>_4" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 4 "register_operand" "0, w") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))) + (minus:SVE_I + (match_operand:SVE_I 4 "register_operand" "0, w") + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w") + (match_operand:SVE_I 3 "register_operand" "w, w"))) (match_dup 4)] UNSPEC_SEL))] "TARGET_SVE" @@ -6795,15 +6795,15 @@ ;; Predicated integer subtraction of product, merging with an ;; independent value. (define_insn_and_rewrite "*cond_fnma<mode>_any" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") + (unspec:SVE_I [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") - (minus:SVE_FULL_I - (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w") - (mult:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))) - (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] + (minus:SVE_I + (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w") + (mult:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w") + (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))) + (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[5]) diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mls_1.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_1.C new file mode 100644 index 0000000..f472db4 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_1.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a - b * c : a; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x0\][^L]*\tmls\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mls_2.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_2.C new file mode 100644 index 0000000..f10b461 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_2.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a - b * c : b; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x1\][^L]*\tmsb\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mls_3.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_3.C new file mode 100644 index 0000000..770d963 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_3.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a - b * c : c; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x2\][^L]*\tmsb\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mls_4.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_4.C new file mode 100644 index 0000000..fac8d95 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_4.C @@ -0,0 +1,36 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a - b * c : d; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.b,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.h,} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x3\][^L]*\tmls\t\1\.s,} } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_mls_5.C b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_5.C new file mode 100644 index 0000000..82b89b4 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_mls_5.C @@ -0,0 +1,33 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c, TYPE d) \ + { \ + return d == 0 ? a - b * c : 0; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (uint8_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (uint8_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.b,} 3 } } */ +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.h,} 2 } } */ +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.s,} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mls_2.c new file mode 100644 index 0000000..b08812e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/mls_2.c @@ -0,0 +1,34 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include <stdint.h> + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_##AMT (TYPE a, TYPE b, TYPE c) \ + { \ + return a - b * c; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (int8_t, 32) +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (int8_t, 64) +TEST_TYPE (uint8_t, 64) +TEST_TYPE (int16_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (int8_t, 128) +TEST_TYPE (uint8_t, 128) +TEST_TYPE (int16_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (int32_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.b,} 6 } } */ +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.h,} 4 } } */ +/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.s,} 2 } } */ |