diff options
-rw-r--r-- | gcc/ChangeLog | 16 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 97 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c | 46 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c | 20 |
10 files changed, 185 insertions, 40 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 78a79ae..2a9d587 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,21 @@ 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * config/aarch64/aarch64-sve.md + (scatter_store<SVE_FULL_SD:mode><v_int_equiv>): Extend to... + (scatter_store<SVE_24:mode><v_int_container>): ...this. + (mask_scatter_store<SVE_FULL_S:mode><v_int_equiv>): Extend to... + (mask_scatter_store<SVE_4:mode><v_int_equiv>): ...this. + (mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>): Extend to... + (mask_scatter_store<SVE_2:mode><v_int_equiv>): ...this. + (*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked): New + pattern. + (*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_sxtw): Extend to... + (*mask_scatter_store<SVE_2:mode><v_int_equiv>_sxtw): ...this. + (*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_uxtw): Extend to... + (*mask_scatter_store<SVE_2:mode><v_int_equiv>_uxtw): ...this. + +2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * config/aarch64/iterators.md (SVE_2BHSI, SVE_2HSDI, SVE_4BHI) (SVE_4HSI): New mode iterators. (ANY_EXTEND2): New code iterator. diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 1dcbb4b..cdc3b4c 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -2135,15 +2135,15 @@ ;; ------------------------------------------------------------------------- ;; Unpredicated scatter stores. -(define_expand "scatter_store<mode><v_int_equiv>" +(define_expand "scatter_store<mode><v_int_container>" [(set (mem:BLK (scratch)) (unspec:BLK [(match_dup 5) (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>") - (match_operand:<V_INT_EQUIV> 1 "register_operand") + (match_operand:<V_INT_CONTAINER> 1 "register_operand") (match_operand:DI 2 "const_int_operand") (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") - (match_operand:SVE_FULL_SD 4 "register_operand")] + (match_operand:SVE_24 4 "register_operand")] UNSPEC_ST1_SCATTER))] "TARGET_SVE" { @@ -2153,48 +2153,74 @@ ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for ;; unsigned extension and false for signed extension. -(define_insn "mask_scatter_store<mode><v_int_equiv>" +(define_insn "mask_scatter_store<mode><v_int_container>" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") - (match_operand:DI 0 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") + (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk") (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") - (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") - (match_operand:SVE_FULL_S 4 "register_operand" "w, w, w, w, w, w")] + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") + (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")] UNSPEC_ST1_SCATTER))] "TARGET_SVE" "@ - st1w\t%4.s, %5, [%1.s] - st1w\t%4.s, %5, [%1.s, #%0] - st1w\t%4.s, %5, [%0, %1.s, sxtw] - st1w\t%4.s, %5, [%0, %1.s, uxtw] - st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] - st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" + st1<Vesize>\t%4.s, %5, [%1.s] + st1<Vesize>\t%4.s, %5, [%1.s, #%0] + st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw] + st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw] + st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3] + st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]" ) ;; Predicated scatter stores for 64-bit elements. The value of operand 2 ;; doesn't matter in this case. -(define_insn "mask_scatter_store<mode><v_int_equiv>" +(define_insn "mask_scatter_store<mode><v_int_container>" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") - (match_operand:DI 0 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") + (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk") (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") (match_operand:DI 2 "const_int_operand") - (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") - (match_operand:SVE_FULL_D 4 "register_operand" "w, w, w, w")] + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") + (match_operand:SVE_2 4 "register_operand" "w, w, w, w")] UNSPEC_ST1_SCATTER))] "TARGET_SVE" "@ - st1d\t%4.d, %5, [%1.d] - st1d\t%4.d, %5, [%1.d, #%0] - st1d\t%4.d, %5, [%0, %1.d] - st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" + st1<Vesize>\t%4.d, %5, [%1.d] + st1<Vesize>\t%4.d, %5, [%1.d, #%0] + st1<Vesize>\t%4.d, %5, [%0, %1.d] + st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]" ) -;; Likewise, but with the offset being sign-extended from 32 bits. -(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_equiv>_sxtw" +;; Likewise, but with the offset being extended from 32 bits. +(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") + (match_operand:DI 0 "register_operand" "rk, rk") + (unspec:VNx2DI + [(match_operand 6) + (ANY_EXTEND:VNx2DI + (match_operand:VNx2SI 1 "register_operand" "w, w"))] + UNSPEC_PRED_X) + (match_operand:DI 2 "const_int_operand") + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") + (match_operand:SVE_2 4 "register_operand" "w, w")] + UNSPEC_ST1_SCATTER))] + "TARGET_SVE" + "@ + st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw] + st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]" + "&& !CONSTANT_P (operands[6])" + { + operands[6] = CONSTM1_RTX (<VPRED>mode); + } +) + +;; Likewise, but with the offset being truncated to 32 bits and then +;; sign-extended. +(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") @@ -2206,21 +2232,22 @@ (match_operand:VNx2DI 1 "register_operand" "w, w")))] UNSPEC_PRED_X) (match_operand:DI 2 "const_int_operand") - (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i") - (match_operand:SVE_FULL_D 4 "register_operand" "w, w")] + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") + (match_operand:SVE_2 4 "register_operand" "w, w")] UNSPEC_ST1_SCATTER))] "TARGET_SVE" "@ - st1d\t%4.d, %5, [%0, %1.d, sxtw] - st1d\t%4.d, %5, [%0, %1.d, sxtw %p3]" - "&& !rtx_equal_p (operands[5], operands[6])" + st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw] + st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]" + "&& !CONSTANT_P (operands[6])" { - operands[6] = copy_rtx (operands[5]); + operands[6] = CONSTM1_RTX (<VPRED>mode); } ) -;; Likewise, but with the offset being zero-extended from 32 bits. -(define_insn "*mask_scatter_store<mode><v_int_equiv>_uxtw" +;; Likewise, but with the offset being truncated to 32 bits and then +;; zero-extended. +(define_insn "*mask_scatter_store<mode><v_int_container>_uxtw" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") @@ -2229,13 +2256,13 @@ (match_operand:VNx2DI 1 "register_operand" "w, w") (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) (match_operand:DI 2 "const_int_operand") - (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i") - (match_operand:SVE_FULL_D 4 "register_operand" "w, w")] + (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") + (match_operand:SVE_2 4 "register_operand" "w, w")] UNSPEC_ST1_SCATTER))] "TARGET_SVE" "@ - st1d\t%4.d, %5, [%0, %1.d, uxtw] - st1d\t%4.d, %5, [%0, %1.d, uxtw %p3]" + st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw] + st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]" ) ;; ------------------------------------------------------------------------- diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e9535af..23a452b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,18 @@ 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * gcc.target/aarch64/sve/scatter_store_1.c (TEST_LOOP): Start at 0. + (TEST_ALL): Add tests for 8-bit and 16-bit elements. + * gcc.target/aarch64/sve/scatter_store_2.c: Update accordingly. + * gcc.target/aarch64/sve/scatter_store_3.c (TEST_LOOP): Start at 0. + (TEST_ALL): Add tests for 8-bit and 16-bit elements. + * gcc.target/aarch64/sve/scatter_store_4.c: Update accordingly. + * gcc.target/aarch64/sve/scatter_store_5.c (TEST_LOOP): Start at 0. + (TEST_ALL): Add tests for 8-bit, 16-bit and 32-bit elements. + * gcc.target/aarch64/sve/scatter_store_8.c: New test. + * gcc.target/aarch64/sve/scatter_store_9.c: Likewise. + +2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * gcc.target/aarch64/sve/gather_load_extend_1.c: New test. * gcc.target/aarch64/sve/gather_load_extend_2.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_3.c: Likewise. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c index 65be5e6..53078fb 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c @@ -13,11 +13,15 @@ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ INDEX##BITS *indices, int n) \ { \ - for (int i = 9; i < n; ++i) \ + for (int i = 0; i < n; ++i) \ dest[indices[i]] = src[i] + 1; \ } #define TEST_ALL(T) \ + T (int8_t, 32) \ + T (uint8_t, 32) \ + T (int16_t, 32) \ + T (uint16_t, 32) \ T (int32_t, 32) \ T (uint32_t, 32) \ T (float, 32) \ @@ -27,5 +31,7 @@ TEST_ALL (TEST_LOOP) +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 1\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c index 5cb507c..6bc7cbb 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c @@ -6,5 +6,7 @@ #include "scatter_store_1.c" +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 1\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c index faa85df..fe3d59a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c @@ -8,17 +8,20 @@ #define INDEX64 int64_t #endif -/* Invoked 18 times for each data size. */ #define TEST_LOOP(DATA_TYPE, BITS) \ void __attribute__ ((noinline, noclone)) \ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ INDEX##BITS *indices, int n) \ { \ - for (int i = 9; i < n; ++i) \ + for (int i = 0; i < n; ++i) \ *(DATA_TYPE *) ((char *) dest + indices[i]) = src[i] + 1; \ } #define TEST_ALL(T) \ + T (int8_t, 32) \ + T (uint8_t, 32) \ + T (int16_t, 32) \ + T (uint16_t, 32) \ T (int32_t, 32) \ T (uint32_t, 32) \ T (float, 32) \ @@ -28,5 +31,7 @@ TEST_ALL (TEST_LOOP) +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c index 8dff57c..8a9fa2c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c @@ -6,5 +6,7 @@ #include "scatter_store_3.c" +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c index 0962a72a..d3a6452 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c @@ -3,21 +3,29 @@ #include <stdint.h> -/* Invoked 18 times for each data size. */ #define TEST_LOOP(DATA_TYPE) \ void __attribute__ ((noinline, noclone)) \ f_##DATA_TYPE (DATA_TYPE *restrict *dest, DATA_TYPE *restrict src, \ int n) \ { \ - for (int i = 9; i < n; ++i) \ + for (int i = 0; i < n; ++i) \ *dest[i] = src[i] + 1; \ } #define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ T (int64_t) \ T (uint64_t) \ T (double) TEST_ALL (TEST_LOOP) +/* We assume this isn't profitable for bytes. */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c new file mode 100644 index 0000000..30f37f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c @@ -0,0 +1,46 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */ + +#include <stdint.h> + +#ifndef INDEX32 +#define INDEX16 int16_t +#define INDEX32 int32_t +#endif + +#define TEST_LOOP(DATA_TYPE, BITS) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + INDEX##BITS *indices, INDEX##BITS mask, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dest[(INDEX##BITS) (indices[i] + mask)] = src[i]; \ + } + +#define TEST_ALL(T) \ + T (int8_t, 16) \ + T (uint8_t, 16) \ + T (int16_t, 16) \ + T (uint16_t, 16) \ + T (_Float16, 16) \ + T (int32_t, 16) \ + T (uint32_t, 16) \ + T (float, 16) \ + T (int64_t, 32) \ + T (uint64_t, 32) \ + T (double, 32) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 1\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, sxtw 3\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tsxt.\tz} 8 } } */ +/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.s,} 8 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c new file mode 100644 index 0000000..0218d35 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c @@ -0,0 +1,20 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */ + +#define INDEX16 uint16_t +#define INDEX32 uint32_t + +#include "scatter_store_8.c" + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 1\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, uxtw 3\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tuxt.\tz} 8 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 8 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ |