aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-11-16 11:30:46 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-11-16 11:30:46 +0000
commit37a3662f76d79a1d9ff02a31e5cc0f7e20dfbc60 (patch)
tree660a9b8440f573162b2b065adca170d59c4b9a64
parent87a80d27218f2325d05adc5382abe7d582062306 (diff)
downloadgcc-37a3662f76d79a1d9ff02a31e5cc0f7e20dfbc60.zip
gcc-37a3662f76d79a1d9ff02a31e5cc0f7e20dfbc60.tar.gz
gcc-37a3662f76d79a1d9ff02a31e5cc0f7e20dfbc60.tar.bz2
[AArch64] Add scatter stores for partial SVE modes
This patch adds support for scatter stores of partial vectors, where the vector base or offset elements can be wider than the elements being stored. 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/aarch64-sve.md (scatter_store<SVE_FULL_SD:mode><v_int_equiv>): Extend to... (scatter_store<SVE_24:mode><v_int_container>): ...this. (mask_scatter_store<SVE_FULL_S:mode><v_int_equiv>): Extend to... (mask_scatter_store<SVE_4:mode><v_int_equiv>): ...this. (mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>): Extend to... (mask_scatter_store<SVE_2:mode><v_int_equiv>): ...this. (*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked): New pattern. (*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_sxtw): Extend to... (*mask_scatter_store<SVE_2:mode><v_int_equiv>_sxtw): ...this. (*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_uxtw): Extend to... (*mask_scatter_store<SVE_2:mode><v_int_equiv>_uxtw): ...this. gcc/testsuite/ * gcc.target/aarch64/sve/scatter_store_1.c (TEST_LOOP): Start at 0. (TEST_ALL): Add tests for 8-bit and 16-bit elements. * gcc.target/aarch64/sve/scatter_store_2.c: Update accordingly. * gcc.target/aarch64/sve/scatter_store_3.c (TEST_LOOP): Start at 0. (TEST_ALL): Add tests for 8-bit and 16-bit elements. * gcc.target/aarch64/sve/scatter_store_4.c: Update accordingly. * gcc.target/aarch64/sve/scatter_store_5.c (TEST_LOOP): Start at 0. (TEST_ALL): Add tests for 8-bit, 16-bit and 32-bit elements. * gcc.target/aarch64/sve/scatter_store_8.c: New test. * gcc.target/aarch64/sve/scatter_store_9.c: Likewise. From-SVN: r278347
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/config/aarch64/aarch64-sve.md97
-rw-r--r--gcc/testsuite/ChangeLog13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c12
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c46
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c20
10 files changed, 185 insertions, 40 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 78a79ae..2a9d587 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,21 @@
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+ * config/aarch64/aarch64-sve.md
+ (scatter_store<SVE_FULL_SD:mode><v_int_equiv>): Extend to...
+ (scatter_store<SVE_24:mode><v_int_container>): ...this.
+ (mask_scatter_store<SVE_FULL_S:mode><v_int_equiv>): Extend to...
+ (mask_scatter_store<SVE_4:mode><v_int_equiv>): ...this.
+ (mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>): Extend to...
+ (mask_scatter_store<SVE_2:mode><v_int_equiv>): ...this.
+ (*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked): New
+ pattern.
+ (*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_sxtw): Extend to...
+ (*mask_scatter_store<SVE_2:mode><v_int_equiv>_sxtw): ...this.
+ (*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_uxtw): Extend to...
+ (*mask_scatter_store<SVE_2:mode><v_int_equiv>_uxtw): ...this.
+
+2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+
* config/aarch64/iterators.md (SVE_2BHSI, SVE_2HSDI, SVE_4BHI)
(SVE_4HSI): New mode iterators.
(ANY_EXTEND2): New code iterator.
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 1dcbb4b..cdc3b4c 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -2135,15 +2135,15 @@
;; -------------------------------------------------------------------------
;; Unpredicated scatter stores.
-(define_expand "scatter_store<mode><v_int_equiv>"
+(define_expand "scatter_store<mode><v_int_container>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_dup 5)
(match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
- (match_operand:<V_INT_EQUIV> 1 "register_operand")
+ (match_operand:<V_INT_CONTAINER> 1 "register_operand")
(match_operand:DI 2 "const_int_operand")
(match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
- (match_operand:SVE_FULL_SD 4 "register_operand")]
+ (match_operand:SVE_24 4 "register_operand")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
{
@@ -2153,48 +2153,74 @@
;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
;; unsigned extension and false for signed extension.
-(define_insn "mask_scatter_store<mode><v_int_equiv>"
+(define_insn "mask_scatter_store<mode><v_int_container>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
- (match_operand:DI 0 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
+ (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
(match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
(match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
- (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
- (match_operand:SVE_FULL_S 4 "register_operand" "w, w, w, w, w, w")]
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
+ (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
"@
- st1w\t%4.s, %5, [%1.s]
- st1w\t%4.s, %5, [%1.s, #%0]
- st1w\t%4.s, %5, [%0, %1.s, sxtw]
- st1w\t%4.s, %5, [%0, %1.s, uxtw]
- st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
- st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
+ st1<Vesize>\t%4.s, %5, [%1.s]
+ st1<Vesize>\t%4.s, %5, [%1.s, #%0]
+ st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
+ st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
+ st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
+ st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
)
;; Predicated scatter stores for 64-bit elements. The value of operand 2
;; doesn't matter in this case.
-(define_insn "mask_scatter_store<mode><v_int_equiv>"
+(define_insn "mask_scatter_store<mode><v_int_container>"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
- (match_operand:DI 0 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
+ (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
(match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
(match_operand:DI 2 "const_int_operand")
- (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
- (match_operand:SVE_FULL_D 4 "register_operand" "w, w, w, w")]
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
+ (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
"@
- st1d\t%4.d, %5, [%1.d]
- st1d\t%4.d, %5, [%1.d, #%0]
- st1d\t%4.d, %5, [%0, %1.d]
- st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
+ st1<Vesize>\t%4.d, %5, [%1.d]
+ st1<Vesize>\t%4.d, %5, [%1.d, #%0]
+ st1<Vesize>\t%4.d, %5, [%0, %1.d]
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
)
-;; Likewise, but with the offset being sign-extended from 32 bits.
-(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_equiv>_sxtw"
+;; Likewise, but with the offset being extended from 32 bits.
+(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
+ (match_operand:DI 0 "register_operand" "rk, rk")
+ (unspec:VNx2DI
+ [(match_operand 6)
+ (ANY_EXTEND:VNx2DI
+ (match_operand:VNx2SI 1 "register_operand" "w, w"))]
+ UNSPEC_PRED_X)
+ (match_operand:DI 2 "const_int_operand")
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+ (match_operand:SVE_2 4 "register_operand" "w, w")]
+ UNSPEC_ST1_SCATTER))]
+ "TARGET_SVE"
+ "@
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
+ "&& !CONSTANT_P (operands[6])"
+ {
+ operands[6] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; sign-extended.
+(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
@@ -2206,21 +2232,22 @@
(match_operand:VNx2DI 1 "register_operand" "w, w")))]
UNSPEC_PRED_X)
(match_operand:DI 2 "const_int_operand")
- (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i")
- (match_operand:SVE_FULL_D 4 "register_operand" "w, w")]
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+ (match_operand:SVE_2 4 "register_operand" "w, w")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
"@
- st1d\t%4.d, %5, [%0, %1.d, sxtw]
- st1d\t%4.d, %5, [%0, %1.d, sxtw %p3]"
- "&& !rtx_equal_p (operands[5], operands[6])"
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
+ "&& !CONSTANT_P (operands[6])"
{
- operands[6] = copy_rtx (operands[5]);
+ operands[6] = CONSTM1_RTX (<VPRED>mode);
}
)
-;; Likewise, but with the offset being zero-extended from 32 bits.
-(define_insn "*mask_scatter_store<mode><v_int_equiv>_uxtw"
+;; Likewise, but with the offset being truncated to 32 bits and then
+;; zero-extended.
+(define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
@@ -2229,13 +2256,13 @@
(match_operand:VNx2DI 1 "register_operand" "w, w")
(match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
(match_operand:DI 2 "const_int_operand")
- (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i")
- (match_operand:SVE_FULL_D 4 "register_operand" "w, w")]
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
+ (match_operand:SVE_2 4 "register_operand" "w, w")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
"@
- st1d\t%4.d, %5, [%0, %1.d, uxtw]
- st1d\t%4.d, %5, [%0, %1.d, uxtw %p3]"
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
+ st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
)
;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e9535af..23a452b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,18 @@
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+ * gcc.target/aarch64/sve/scatter_store_1.c (TEST_LOOP): Start at 0.
+ (TEST_ALL): Add tests for 8-bit and 16-bit elements.
+ * gcc.target/aarch64/sve/scatter_store_2.c: Update accordingly.
+ * gcc.target/aarch64/sve/scatter_store_3.c (TEST_LOOP): Start at 0.
+ (TEST_ALL): Add tests for 8-bit and 16-bit elements.
+ * gcc.target/aarch64/sve/scatter_store_4.c: Update accordingly.
+ * gcc.target/aarch64/sve/scatter_store_5.c (TEST_LOOP): Start at 0.
+ (TEST_ALL): Add tests for 8-bit, 16-bit and 32-bit elements.
+ * gcc.target/aarch64/sve/scatter_store_8.c: New test.
+ * gcc.target/aarch64/sve/scatter_store_9.c: Likewise.
+
+2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
+
* gcc.target/aarch64/sve/gather_load_extend_1.c: New test.
* gcc.target/aarch64/sve/gather_load_extend_2.c: Likewise.
* gcc.target/aarch64/sve/gather_load_extend_3.c: Likewise.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c
index 65be5e6..53078fb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_1.c
@@ -13,11 +13,15 @@
f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
INDEX##BITS *indices, int n) \
{ \
- for (int i = 9; i < n; ++i) \
+ for (int i = 0; i < n; ++i) \
dest[indices[i]] = src[i] + 1; \
}
#define TEST_ALL(T) \
+ T (int8_t, 32) \
+ T (uint8_t, 32) \
+ T (int16_t, 32) \
+ T (uint16_t, 32) \
T (int32_t, 32) \
T (uint32_t, 32) \
T (float, 32) \
@@ -27,5 +31,7 @@
TEST_ALL (TEST_LOOP)
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 1\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c
index 5cb507c..6bc7cbb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_2.c
@@ -6,5 +6,7 @@
#include "scatter_store_1.c"
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 1\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c
index faa85df..fe3d59a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_3.c
@@ -8,17 +8,20 @@
#define INDEX64 int64_t
#endif
-/* Invoked 18 times for each data size. */
#define TEST_LOOP(DATA_TYPE, BITS) \
void __attribute__ ((noinline, noclone)) \
f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
INDEX##BITS *indices, int n) \
{ \
- for (int i = 9; i < n; ++i) \
+ for (int i = 0; i < n; ++i) \
*(DATA_TYPE *) ((char *) dest + indices[i]) = src[i] + 1; \
}
#define TEST_ALL(T) \
+ T (int8_t, 32) \
+ T (uint8_t, 32) \
+ T (int16_t, 32) \
+ T (uint16_t, 32) \
T (int32_t, 32) \
T (uint32_t, 32) \
T (float, 32) \
@@ -28,5 +31,7 @@
TEST_ALL (TEST_LOOP)
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c
index 8dff57c..8a9fa2c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_4.c
@@ -6,5 +6,7 @@
#include "scatter_store_3.c"
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c
index 0962a72a..d3a6452 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_5.c
@@ -3,21 +3,29 @@
#include <stdint.h>
-/* Invoked 18 times for each data size. */
#define TEST_LOOP(DATA_TYPE) \
void __attribute__ ((noinline, noclone)) \
f_##DATA_TYPE (DATA_TYPE *restrict *dest, DATA_TYPE *restrict src, \
int n) \
{ \
- for (int i = 9; i < n; ++i) \
+ for (int i = 0; i < n; ++i) \
*dest[i] = src[i] + 1; \
}
#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
T (int64_t) \
T (uint64_t) \
T (double)
TEST_ALL (TEST_LOOP)
+/* We assume this isn't profitable for bytes. */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c
new file mode 100644
index 0000000..30f37f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_8.c
@@ -0,0 +1,46 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, BITS) \
+ void __attribute__ ((noinline, noclone)) \
+ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
+ INDEX##BITS *indices, INDEX##BITS mask, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dest[(INDEX##BITS) (indices[i] + mask)] = src[i]; \
+ }
+
+#define TEST_ALL(T) \
+ T (int8_t, 16) \
+ T (uint8_t, 16) \
+ T (int16_t, 16) \
+ T (uint16_t, 16) \
+ T (_Float16, 16) \
+ T (int32_t, 16) \
+ T (uint32_t, 16) \
+ T (float, 16) \
+ T (int64_t, 32) \
+ T (uint64_t, 32) \
+ T (double, 32)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, sxtw 3\]\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tsxt.\tz} 8 } } */
+/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.s,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c
new file mode 100644
index 0000000..0218d35
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/scatter_store_9.c
@@ -0,0 +1,20 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */
+
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+
+#include "scatter_store_8.c"
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 1\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, uxtw 3\]\n} 3 } } */
+
+/* { dg-final { scan-assembler-times {\tuxt.\tz} 8 } } */
+/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */