aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/config/aarch64/aarch64-sve.md97
-rw-r--r--gcc/testsuite/ChangeLog13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c46
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c20
10 files changed, 185 insertions, 40 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 78a79ae..2a9d587 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,21 @@
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+ * config/aarch64/aarch64-sve.md
+ (scatter_store<SVE_FULL_SD:mode><v_int_equiv>): Extend to...
+ (scatter_store<SVE_24:mode><v_int_container>): ...this.
+ (mask_scatter_store<SVE_FULL_S:mode><v_int_equiv>): Extend to...
+ (mask_scatter_store<SVE_4:mode><v_int_equiv>): ...this.
+ (mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>): Extend to...
+ (mask_scatter_store<SVE_2:mode><v_int_equiv>): ...this.
+ (*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked): New
+ pattern.
+ (*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_sxtw): Extend to...
+ (*mask_scatter_store<SVE_2:mode><v_int_equiv>_sxtw): ...this.
+ (*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_uxtw): Extend to...
+ (*mask_scatter_store<SVE_2:mode><v_int_equiv>_uxtw): ...this.
+
+2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+
* config/aarch64/iterators.md (SVE_2BHSI, SVE_2HSDI, SVE_4BHI)
(SVE_4HSI): New mode iterators.
(ANY_EXTEND2): New code iterator.
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 1dcbb4b..cdc3b4c 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -2135,15 +2135,15 @@
;; -------------------------------------------------------------------------
;; Unpredicated scatter stores.
-(define_expand "scatter_store<mode><v_int_equiv>"
+(define_expand "scatter_store<mode><v_int_container>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_dup 5)
(match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
- (match_operand:<V_INT_EQUIV> 1 "register_operand")
+ (match_operand:<V_INT_CONTAINER> 1 "register_operand")
(match_operand:DI 2 "const_int_operand")
(match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
- (match_operand:SVE_FULL_SD 4 "register_operand")]
+ (match_operand:SVE_24 4 "register_operand")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
{
@@ -2153,48 +2153,74 @@
;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
;; unsigned extension and false for signed extension.
-(define_insn "mask_scatter_store<mode><v_int_equiv>"
+(define_insn "mask_scatter_store<mode><v_int_container>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
- (match_operand:DI 0 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
+ (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
(match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
(match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
- (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
- (match_operand:SVE_FULL_S 4 "register_operand" "w, w, w, w, w, w")]
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
+ (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
"@
- st1w\t%4.s, %5, [%1.s]
- st1w\t%4.s, %5, [%1.s, #%0]
- st1w\t%4.s, %5, [%0, %1.s, sxtw]
- st1w\t%4.s, %5, [%0, %1.s, uxtw]
- st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
- st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
+ st1<Vesize>\t%4.s, %5, [%1.s]
+ st1<Vesize>\t%4.s, %5, [%1.s, #%0]
+ st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
+ st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
+ st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
+ st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
)
;; Predicated scatter stores for 64-bit elements. The value of operand 2
;; doesn't matter in this case.
-(define_insn "mask_scatter_store<mode><v_int_equiv>"
+(define_insn "mask_scatter_store<mode><v_int_container>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
- (match_operand:DI 0 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
+ (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
(match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
(match_operand:DI 2 "const_int_operand")
- (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
- (match_operand:SVE_FULL_D 4 "register_operand" "w, w, w, w")]
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
+ (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
"@
- st1d\t%4.d, %5, [%1.d]
- st1d\t%4.d, %5, [%1.d, #%0]
- st1d\t%4.d, %5, [%0, %1.d]
- st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
+ st1<Vesize>\t%4.d, %5, [%1.d]
+ st1<Vesize>\t%4.d, %5, [%1.d, #%0]
+ st1<Vesize>\t%4.d, %5, [%0, %1.d]
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
)
-;; Likewise, but with the offset being sign-extended from 32 bits.
-(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_equiv>_sxtw"
+;; Likewise, but with the offset being extended from 32 bits.
+(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+ (match_operand:DI 0 "register_operand" "rk, rk")
+ (unspec:VNx2DI
+ [(match_operand 6)
+ (ANY_EXTEND:VNx2DI
+ (match_operand:VNx2SI 1 "register_operand" "w, w"))]
+ UNSPEC_PRED_X)
+ (match_operand:DI 2 "const_int_operand")
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+ (match_operand:SVE_2 4 "register_operand" "w, w")]
+ UNSPEC_ST1_SCATTER))]
+ "TARGET_SVE"
+ "@
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
+ "&& !CONSTANT_P (operands[6])"
+ {
+ operands[6] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; sign-extended.
+(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
@@ -2206,21 +2232,22 @@
(match_operand:VNx2DI 1 "register_operand" "w, w")))]
UNSPEC_PRED_X)
(match_operand:DI 2 "const_int_operand")
- (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i")
- (match_operand:SVE_FULL_D 4 "register_operand" "w, w")]
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+ (match_operand:SVE_2 4 "register_operand" "w, w")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
"@
- st1d\t%4.d, %5, [%0, %1.d, sxtw]
- st1d\t%4.d, %5, [%0, %1.d, sxtw %p3]"
- "&& !rtx_equal_p (operands[5], operands[6])"
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
+ "&& !CONSTANT_P (operands[6])"
{
- operands[6] = copy_rtx (operands[5]);
+ operands[6] = CONSTM1_RTX (<VPRED>mode);
}
)
-;; Likewise, but with the offset being zero-extended from 32 bits.
-(define_insn "*mask_scatter_store<mode><v_int_equiv>_uxtw"
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; zero-extended.
+(define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
@@ -2229,13 +2256,13 @@
(match_operand:VNx2DI 1 "register_operand" "w, w")
(match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
(match_operand:DI 2 "const_int_operand")
- (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i")
- (match_operand:SVE_FULL_D 4 "register_operand" "w, w")]
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+ (match_operand:SVE_2 4 "register_operand" "w, w")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
"@
- st1d\t%4.d, %5, [%0, %1.d, uxtw]
- st1d\t%4.d, %5, [%0, %1.d, uxtw %p3]"
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
)
;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e9535af..23a452b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,18 @@
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+ * gcc.target/aarch64/sve/scatter_store_1.c (TEST_LOOP): Start at 0.
+ (TEST_ALL): Add tests for 8-bit and 16-bit elements.
+ * gcc.target/aarch64/sve/scatter_store_2.c: Update accordingly.
+ * gcc.target/aarch64/sve/scatter_store_3.c (TEST_LOOP): Start at 0.
+ (TEST_ALL): Add tests for 8-bit and 16-bit elements.
+ * gcc.target/aarch64/sve/scatter_store_4.c: Update accordingly.
+ * gcc.target/aarch64/sve/scatter_store_5.c (TEST_LOOP): Start at 0.
+ (TEST_ALL): Add tests for 8-bit, 16-bit and 32-bit elements.
+ * gcc.target/aarch64/sve/scatter_store_8.c: New test.
+ * gcc.target/aarch64/sve/scatter_store_9.c: Likewise.
+
+2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+
* gcc.target/aarch64/sve/gather_load_extend_1.c: New test.
* gcc.target/aarch64/sve/gather_load_extend_2.c: Likewise.
* gcc.target/aarch64/sve/gather_load_extend_3.c: Likewise.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c
index 65be5e6..53078fb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c
@@ -13,11 +13,15 @@
f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
INDEX##BITS *indices, int n) \
{ \
- for (int i = 9; i < n; ++i) \
+ for (int i = 0; i < n; ++i) \
dest[indices[i]] = src[i] + 1; \
}
#define TEST_ALL(T) \
+ T (int8_t, 32) \
+ T (uint8_t, 32) \
+ T (int16_t, 32) \
+ T (uint16_t, 32) \
T (int32_t, 32) \
T (uint32_t, 32) \
T (float, 32) \
@@ -27,5 +31,7 @@
TEST_ALL (TEST_LOOP)
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 1\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c
index 5cb507c..6bc7cbb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c
@@ -6,5 +6,7 @@
#include "scatter_store_1.c"
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 1\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c
index faa85df..fe3d59a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c
@@ -8,17 +8,20 @@
#define INDEX64 int64_t
#endif
-/* Invoked 18 times for each data size. */
#define TEST_LOOP(DATA_TYPE, BITS) \
void __attribute__ ((noinline, noclone)) \
f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
INDEX##BITS *indices, int n) \
{ \
- for (int i = 9; i < n; ++i) \
+ for (int i = 0; i < n; ++i) \
*(DATA_TYPE *) ((char *) dest + indices[i]) = src[i] + 1; \
}
#define TEST_ALL(T) \
+ T (int8_t, 32) \
+ T (uint8_t, 32) \
+ T (int16_t, 32) \
+ T (uint16_t, 32) \
T (int32_t, 32) \
T (uint32_t, 32) \
T (float, 32) \
@@ -28,5 +31,7 @@
TEST_ALL (TEST_LOOP)
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c
index 8dff57c..8a9fa2c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c
@@ -6,5 +6,7 @@
#include "scatter_store_3.c"
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c
index 0962a72a..d3a6452 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c
@@ -3,21 +3,29 @@
#include <stdint.h>
-/* Invoked 18 times for each data size. */
#define TEST_LOOP(DATA_TYPE) \
void __attribute__ ((noinline, noclone)) \
f_##DATA_TYPE (DATA_TYPE *restrict *dest, DATA_TYPE *restrict src, \
int n) \
{ \
- for (int i = 9; i < n; ++i) \
+ for (int i = 0; i < n; ++i) \
*dest[i] = src[i] + 1; \
}
#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
T (int64_t) \
T (uint64_t) \
T (double)
TEST_ALL (TEST_LOOP)
+/* We assume this isn't profitable for bytes. */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c
new file mode 100644
index 0000000..30f37f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c
@@ -0,0 +1,46 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, INDEX##BITS mask, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[(INDEX##BITS) (indices[i] + mask)] = src[i]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int8_t, 16) \
+ T (uint8_t, 16) \
+ T (int16_t, 16) \
+ T (uint16_t, 16) \
+ T (_Float16, 16) \
+ T (int32_t, 16) \
+ T (uint32_t, 16) \
+ T (float, 16) \
+ T (int64_t, 32) \
+ T (uint64_t, 32) \
+ T (double, 32)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, sxtw 3\]\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tsxt.\tz} 8 } } */
+/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.s,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c
new file mode 100644
index 0000000..0218d35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c
@@ -0,0 +1,20 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */
+
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+
+#include "scatter_store_8.c"
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, uxtw 3\]\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tuxt.\tz} 8 } } */
+/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */