diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 20 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 109 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 35 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/gather_load_1.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/gather_load_2.c | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/gather_load_3.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/gather_load_4.c | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/gather_load_5.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/gather_load_6.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/gather_load_7.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/gather_load_8.c | 46 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/gather_load_9.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_6.c | 2 |
14 files changed, 268 insertions, 50 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7fe9a11..1253306 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,25 @@ 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * config/aarch64/iterators.md (SVE_24, SVE_2, SVE_4): New mode + iterators. + * config/aarch64/aarch64-sve.md + (gather_load<SVE_FULL_SD:mode><v_int_equiv>): Extend to... + (gather_load<SVE_24:mode><v_int_container>): ...this. + (mask_gather_load<SVE_FULL_S:mode><v_int_equiv>): Extend to... + (mask_gather_load<SVE_4:mode><v_int_container>): ...this. + (mask_gather_load<SVE_FULL_D:mode><v_int_equiv>): Extend to... + (mask_gather_load<SVE_2:mode><v_int_container>): ...this. + (*mask_gather_load<SVE_2:mode><v_int_container>_<su>xtw_unpacked): + New pattern. + (*mask_gather_load<SVE_FULL_D:mode><v_int_equiv>_sxtw): Extend to... + (*mask_gather_load<SVE_2:mode><v_int_equiv>_sxtw): ...this. + Allow the nominal extension predicate to be different from the + load predicate. + (*mask_gather_load<SVE_FULL_D:mode><v_int_equiv>_uxtw): Extend to... + (*mask_gather_load<SVE_2:mode><v_int_equiv>_uxtw): ...this. + +2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * config/aarch64/aarch64-sve.md (trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2): New pattern. * config/aarch64/aarch64.c (aarch64_integer_truncation_p): New diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 158a178..e26ac45 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1302,12 +1302,12 @@ ;; ------------------------------------------------------------------------- ;; Unpredicated gather loads. -(define_expand "gather_load<mode><v_int_equiv>" - [(set (match_operand:SVE_FULL_SD 0 "register_operand") - (unspec:SVE_FULL_SD +(define_expand "gather_load<mode><v_int_container>" + [(set (match_operand:SVE_24 0 "register_operand") + (unspec:SVE_24 [(match_dup 5) (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>") - (match_operand:<V_INT_EQUIV> 2 "register_operand") + (match_operand:<V_INT_CONTAINER> 2 "register_operand") (match_operand:DI 3 "const_int_operand") (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") (mem:BLK (scratch))] @@ -1320,85 +1320,116 @@ ;; Predicated gather loads for 32-bit elements. Operand 3 is true for ;; unsigned extension and false for signed extension. -(define_insn "mask_gather_load<mode><v_int_equiv>" - [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w") - (unspec:SVE_FULL_S +(define_insn "mask_gather_load<mode><v_int_container>" + [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w") + (unspec:SVE_4 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") - (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") + (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk") (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") - (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") (mem:BLK (scratch))] UNSPEC_LD1_GATHER))] "TARGET_SVE" "@ - ld1w\t%0.s, %5/z, [%2.s] - ld1w\t%0.s, %5/z, [%2.s, #%1] - ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] - ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] - ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] - ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" + ld1<Vesize>\t%0.s, %5/z, [%2.s] + ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1] + ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] + ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] + ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] + ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" ) ;; Predicated gather loads for 64-bit elements. The value of operand 3 ;; doesn't matter in this case. -(define_insn "mask_gather_load<mode><v_int_equiv>" - [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w") - (unspec:SVE_FULL_D +(define_insn "mask_gather_load<mode><v_int_container>" + [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w") + (unspec:SVE_2 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") - (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") + (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk") (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") (match_operand:DI 3 "const_int_operand") - (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") (mem:BLK (scratch))] UNSPEC_LD1_GATHER))] "TARGET_SVE" "@ - ld1d\t%0.d, %5/z, [%2.d] - ld1d\t%0.d, %5/z, [%2.d, #%1] - ld1d\t%0.d, %5/z, [%1, %2.d] - ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" + ld1<Vesize>\t%0.d, %5/z, [%2.d] + ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1] + ld1<Vesize>\t%0.d, %5/z, [%1, %2.d] + ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]" ) -;; Likewise, but with the offset being sign-extended from 32 bits. -(define_insn "*mask_gather_load<mode><v_int_equiv>_sxtw" - [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") - (unspec:SVE_FULL_D +;; Likewise, but with the offset being extended from 32 bits. +(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked" + [(set (match_operand:SVE_2 0 "register_operand" "=w, w") + (unspec:SVE_2 + [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") + (match_operand:DI 1 "register_operand" "rk, rk") + (unspec:VNx2DI + [(match_operand 6) + (ANY_EXTEND:VNx2DI + (match_operand:VNx2SI 2 "register_operand" "w, w"))] + UNSPEC_PRED_X) + (match_operand:DI 3 "const_int_operand") + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") + (mem:BLK (scratch))] + UNSPEC_LD1_GATHER))] + "TARGET_SVE" + "@ + ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw] + ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]" + "&& !CONSTANT_P (operands[6])" + { + operands[6] = CONSTM1_RTX (VNx2BImode); + } +) + +;; Likewise, but with the offset being truncated to 32 bits and then +;; sign-extended. +(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw" + [(set (match_operand:SVE_2 0 "register_operand" "=w, w") + (unspec:SVE_2 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") (match_operand:DI 1 "register_operand" "rk, rk") (unspec:VNx2DI - [(match_dup 5) + [(match_operand 6) (sign_extend:VNx2DI (truncate:VNx2SI (match_operand:VNx2DI 2 "register_operand" "w, w")))] UNSPEC_PRED_X) (match_operand:DI 3 "const_int_operand") - (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") (mem:BLK (scratch))] UNSPEC_LD1_GATHER))] "TARGET_SVE" "@ - ld1d\t%0.d, %5/z, [%1, %2.d, sxtw] - ld1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" + ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] + ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" + "&& !CONSTANT_P (operands[6])" + { + operands[6] = CONSTM1_RTX (VNx2BImode); + } ) -;; Likewise, but with the offset being zero-extended from 32 bits. -(define_insn "*mask_gather_load<mode><v_int_equiv>_uxtw" - [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") - (unspec:SVE_FULL_D +;; Likewise, but with the offset being truncated to 32 bits and then +;; zero-extended. +(define_insn "*mask_gather_load<mode><v_int_container>_uxtw" + [(set (match_operand:SVE_2 0 "register_operand" "=w, w") + (unspec:SVE_2 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") (match_operand:DI 1 "register_operand" "rk, rk") (and:VNx2DI (match_operand:VNx2DI 2 "register_operand" "w, w") (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) (match_operand:DI 3 "const_int_operand") - (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") + (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") (mem:BLK (scratch))] UNSPEC_LD1_GATHER))] "TARGET_SVE" "@ - ld1d\t%0.d, %5/z, [%1, %2.d, uxtw] - ld1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" + ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] + ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" ) ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 06e91eb..c5b0fa7 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -364,6 +364,16 @@ VNx4SI VNx2SI VNx2DI]) +;; SVE modes with 2 or 4 elements. +(define_mode_iterator SVE_24 [VNx2QI VNx2HI VNx2HF VNx2SI VNx2SF VNx2DI VNx2DF + VNx4QI VNx4HI VNx4HF VNx4SI VNx4SF]) + +;; SVE modes with 2 elements. +(define_mode_iterator SVE_2 [VNx2QI VNx2HI VNx2HF VNx2SI VNx2SF VNx2DI VNx2DF]) + +;; SVE modes with 4 elements. +(define_mode_iterator SVE_4 [VNx4QI VNx4HI VNx4HF VNx4SI VNx4SF]) + ;; Modes involved in extending or truncating SVE data, for 8 elements per ;; 128-bit block. (define_mode_iterator VNx8_NARROW [VNx8QI]) @@ -1113,6 +1123,31 @@ (VNx4SI "vnx4sf") (VNx4SF "vnx4sf") (VNx2DI "vnx2df") (VNx2DF "vnx2df")]) +;; Maps full and partial vector modes of any element type to a full-vector +;; integer mode with the same number of units. +(define_mode_attr V_INT_CONTAINER [(VNx16QI "VNx16QI") (VNx8QI "VNx8HI") + (VNx4QI "VNx4SI") (VNx2QI "VNx2DI") + (VNx8HI "VNx8HI") (VNx4HI "VNx4SI") + (VNx2HI "VNx2DI") + (VNx4SI "VNx4SI") (VNx2SI "VNx2DI") + (VNx2DI "VNx2DI") + (VNx8HF "VNx8HI") (VNx4HF "VNx4SI") + (VNx2HF "VNx2DI") + (VNx4SF "VNx4SI") (VNx2SF "VNx2SI") + (VNx2DF "VNx2DI")]) + +;; Lower-case version of V_INT_CONTAINER. +(define_mode_attr v_int_container [(VNx16QI "vnx16qi") (VNx8QI "vnx8hi") + (VNx4QI "vnx4si") (VNx2QI "vnx2di") + (VNx8HI "vnx8hi") (VNx4HI "vnx4si") + (VNx2HI "vnx2di") + (VNx4SI "vnx4si") (VNx2SI "vnx2di") + (VNx2DI "vnx2di") + (VNx8HF "vnx8hi") (VNx4HF "vnx4si") + (VNx2HF "vnx2di") + (VNx4SF "vnx4si") (VNx2SF "vnx2di") + (VNx2DF "vnx2di")]) + ;; Mode for vector conditional operations where the comparison has ;; different type from the lhs. (define_mode_attr V_cmp_mixed [(V2SI "V2SF") (V4SI "V4SF") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 28f99e0..e15be63 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,25 @@ 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * gcc.target/aarch64/sve/gather_load_1.c (TEST_LOOP): Start at 0. + (TEST_ALL): Add tests for 8-bit and 16-bit elements. + * gcc.target/aarch64/sve/gather_load_2.c: Update accordingly. + * gcc.target/aarch64/sve/gather_load_3.c (TEST_LOOP): Start at 0. + (TEST_ALL): Add tests for 8-bit and 16-bit elements. + * gcc.target/aarch64/sve/gather_load_4.c: Update accordingly. + * gcc.target/aarch64/sve/gather_load_5.c (TEST_LOOP): Start at 0. + (TEST_ALL): Add tests for 8-bit, 16-bit and 32-bit elements. + * gcc.target/aarch64/sve/gather_load_6.c: Add + --param aarch64-sve-compare-costs=0. + (TEST_LOOP): Start at 0. + * gcc.target/aarch64/sve/gather_load_7.c: Add + --param aarch64-sve-compare-costs=0. + * gcc.target/aarch64/sve/gather_load_8.c: New test. + * gcc.target/aarch64/sve/gather_load_9.c: Likewise. + * gcc.target/aarch64/sve/mask_gather_load_6.c: Add + --param aarch64-sve-compare-costs=0. + +2019-11-16 Richard Sandiford <richard.sandiford@arm.com> + * gcc.target/aarch64/sve/mask_struct_load_1.c: Add --param aarch64-sve-compare-costs=0. * gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_1.c index 33f1629..941ca65 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_1.c @@ -8,17 +8,20 @@ #define INDEX64 int64_t #endif -/* Invoked 18 times for each data size. */ #define TEST_LOOP(DATA_TYPE, BITS) \ void __attribute__ ((noinline, noclone)) \ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ INDEX##BITS *indices, int n) \ { \ - for (int i = 9; i < n; ++i) \ + for (int i = 0; i < n; ++i) \ dest[i] += src[indices[i]]; \ } #define TEST_ALL(T) \ + T (int8_t, 32) \ + T (uint8_t, 32) \ + T (int16_t, 32) \ + T (uint16_t, 32) \ T (int32_t, 32) \ T (uint32_t, 32) \ T (float, 32) \ @@ -28,5 +31,12 @@ TEST_ALL (TEST_LOOP) +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 1\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_2.c index e3fb2a9..4a73d1f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_2.c @@ -6,5 +6,12 @@ #include "gather_load_1.c" +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 1\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_3.c index 54af507..bd4b208 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_3.c @@ -8,17 +8,20 @@ #define INDEX64 int64_t #endif -/* Invoked 18 times for each data size. */ #define TEST_LOOP(DATA_TYPE, BITS) \ void __attribute__ ((noinline, noclone)) \ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ INDEX##BITS *indices, int n) \ { \ - for (int i = 9; i < n; ++i) \ + for (int i = 0; i < n; ++i) \ dest[i] += *(DATA_TYPE *) ((char *) src + indices[i]); \ } #define TEST_ALL(T) \ + T (int8_t, 32) \ + T (uint8_t, 32) \ + T (int16_t, 32) \ + T (uint16_t, 32) \ T (int32_t, 32) \ T (uint32_t, 32) \ T (float, 32) \ @@ -28,5 +31,12 @@ TEST_ALL (TEST_LOOP) +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_4.c index 3e2c831..2cfded6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_4.c @@ -6,5 +6,12 @@ #include "gather_load_3.c" +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_5.c index b22a80a..3737e04 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_5.c @@ -3,21 +3,34 @@ #include <stdint.h> -/* Invoked 18 times for each data size. */ #define TEST_LOOP(DATA_TYPE) \ void __attribute__ ((noinline, noclone)) \ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict *src, \ int n) \ { \ - for (int i = 9; i < n; ++i) \ + for (int i = 0; i < n; ++i) \ dest[i] += *src[i]; \ } #define TEST_ALL(T) \ + T (int8_t) \ + T (uint8_t) \ + T (int16_t) \ + T (uint16_t) \ + T (int32_t) \ + T (uint32_t) \ T (int64_t) \ T (uint64_t) \ T (double) TEST_ALL (TEST_LOOP) +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+.d\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+.d\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+.d\]\n} 2 } } */ /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+.d\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_6.c index 8445be4..6fdd16b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_6.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */ +/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps --param aarch64-sve-compare-costs=0" } */ #include <stdint.h> @@ -8,13 +8,12 @@ #define INDEX32 int32_t #endif -/* Invoked 18 times for each data size. */ #define TEST_LOOP(DATA_TYPE, BITS) \ void __attribute__ ((noinline, noclone)) \ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ INDEX##BITS *indices, INDEX##BITS mask, int n) \ { \ - for (int i = 9; i < n; ++i) \ + for (int i = 0; i < n; ++i) \ dest[i] = src[(INDEX##BITS) (indices[i] | mask)]; \ } diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_7.c index f5ae930..5a3f3e7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_7.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -ftree-vectorize --save-temps" } */ +/* { dg-options "-O2 -ftree-vectorize --save-temps --param aarch64-sve-compare-costs=0" } */ #define INDEX16 uint16_t #define INDEX32 uint32_t diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_8.c new file mode 100644 index 0000000..0ea6f72 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_8.c @@ -0,0 +1,46 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */ + +#include <stdint.h> + +#ifndef INDEX32 +#define INDEX16 int16_t +#define INDEX32 int32_t +#endif + +#define TEST_LOOP(DATA_TYPE, BITS) \ + void __attribute__ ((noinline, noclone)) \ + f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ + INDEX##BITS *indices, INDEX##BITS mask, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dest[i] = src[(INDEX##BITS) (indices[i] + mask)]; \ + } + +#define TEST_ALL(T) \ + T (int8_t, 16) \ + T (uint8_t, 16) \ + T (int16_t, 16) \ + T (uint16_t, 16) \ + T (_Float16, 16) \ + T (int32_t, 16) \ + T (uint32_t, 16) \ + T (float, 16) \ + T (int64_t, 32) \ + T (uint64_t, 32) \ + T (double, 32) + +TEST_ALL (TEST_LOOP) + +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 1\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, sxtw 3\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tsxt.\tz} 8 } } */ +/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.s,} 8 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_9.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_9.c new file mode 100644 index 0000000..04b71f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_9.c @@ -0,0 +1,20 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */ + +#define INDEX16 uint16_t +#define INDEX32 uint32_t + +#include "gather_load_8.c" + +/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 1\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, uxtw 3\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {\tuxt.\tz} 8 } } */ +/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 8 } } */ + +/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_6.c index ff01431..a13516a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_6.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps --param aarch64-sve-compare-costs=0" } */ #include <stdint.h> |