aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2024-11-11 12:32:20 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2024-11-11 12:32:20 +0000
commitf7ed863632547705a6c791ea6487fcb33ee029a8 (patch)
treedc35203fa7b7742b84f8d941e5c6393514bbe2bf /gcc
parentf13e0138269f16ae896d952425c04ad2e11b8dbe (diff)
downloadgcc-f7ed863632547705a6c791ea6487fcb33ee029a8.zip
gcc-f7ed863632547705a6c791ea6487fcb33ee029a8.tar.gz
gcc-f7ed863632547705a6c791ea6487fcb33ee029a8.tar.bz2
aarch64: Add common subset of SVE2p1 and SME2
This patch handles the SVE2p1 instructions that are shared with SME2. This includes the consecutive-register forms of the 2-register and 4-register loads and stores, but not the strided-register forms. gcc/ * config/aarch64/aarch64.h (TARGET_SVE2p1_OR_SME2): New macro. * config/aarch64/aarch64-early-ra.cc (is_stride_candidate): Require TARGET_STREAMING_SME2 (early_ra::maybe_convert_to_strided_access): Likewise. * config/aarch64/aarch64-sve-builtins-sve2.def: Mark instructions that are common to both SVE2p1 and SME2. * config/aarch64/aarch64-sve.md (@aarch64_<sur>dot_prod_lane<SVE_FULL_SDI:mode><SVE_FULL_BHI:mode>): Test TARGET_SVE2p1_OR_SME2 instead of TARGET_STREAMING_SME2. (@aarch64_sve_<sve_fp_op>vnx4sf): Move TARGET_SVE_BF16 condition into SVE_BFLOAT_TERNARY_LONG. (@aarch64_sve_<sve_fp_op>_lanevnx4sf): Likewise SVE_BFLOAT_TERNARY_LONG_LANE. * config/aarch64/aarch64-sve2.md (@aarch64_<LD1_COUNT:optab><mode>): Require TARGET_SVE2p1_OR_SME2 instead of TARGET_STREAMING_SME2. (@aarch64_<ST1_COUNT:optab><mode>): Likewise. (@aarch64_sve_ptrue_c<BHSD_BITS>): Likewise. (@aarch64_sve_pext<BHSD_BITS>): Likewise. (@aarch64_sve_pext<BHSD_BITS>x2): Likewise. (@aarch64_sve_cntp_c<BHSD_BITS>): Likewise. (@aarch64_sve_fclamp<mode>): Likewise. (*aarch64_sve_fclamp<mode>_x): Likewise. (<sur>dot_prodvnx4sivnx8hi): Likewise. (aarch64_sve_fdotvnx4sfvnx8hf): Likewise. (aarch64_fdot_prod_lanevnx4sfvnx8hf): Likewise. (@aarch64_sve_while<while_optab_cmp>_b<BHSD_BITS>_x2): Likewise. (@aarch64_sve_while<while_optab_cmp>_c<BHSD_BITS>): Likewise. (@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8SI_ONLY:mode>): Move TARGET_STREAMING_SME2 condition into SVE_QCVTxN. (@aarch64_sve_<sve_int_op><mode>): Likewise SVE2_INT_SHIFT_IMM_NARROWxN, but also require TARGET_STREAMING_SME2 for the 4-register forms. * config/aarch64/iterators.md (SVE_BFLOAT_TERNARY_LONG): Require TARGET_SVE2p1_OR_SME2 rather than TARGET_STREAMING_SME2 for UNSPEC_BFMLSLB and UNSPEC_BFMLSLT. Require TARGET_SVE_BF16 for the others. (SVE_BFLOAT_TERNARY_LONG_LANE): Likewise. (SVE2_INT_SHIFT_IMM_NARROWxN): Require TARGET_SVE2p1_OR_SME2 for the interleaving forms and TARGET_STREAMING_SME2 for the rest. (SVE_QCVTxN): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/clamp_3.c: New test. * gcc.target/aarch64/sve/clamp_4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/bfmlslb_f32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/bfmlslb_lane_f32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/bfmlslt_f32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/bfmlslt_lane_f32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_f16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_f32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_f64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cntp_c16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cntp_c32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cntp_c64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/cntp_c8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/dot_f32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/dot_lane_f32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/dot_lane_s32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/dot_lane_u32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/dot_s32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/dot_u32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_bf16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_bf16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_f16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_f16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_f32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_f32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_f64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_f64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_s16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_s16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_s32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_s32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_s64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_s64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_s8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_s8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_u16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_u16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_u32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_u32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_u64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_u64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_u8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ld1_u8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_bf16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_bf16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_f16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_f16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_f32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_f32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_f64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_f64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_s16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_s16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_s32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_s32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_s64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_s64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_s8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_s8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_u16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_u16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_u32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_u32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_u64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_u64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_u8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ldnt1_u8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/pext_lane_c16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/pext_lane_c16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/pext_lane_c32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/pext_lane_c32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/pext_lane_c64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/pext_lane_c64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/pext_lane_c8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/pext_lane_c8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ptrue_c16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ptrue_c32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ptrue_c64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/ptrue_c8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/qcvtn_s16_s32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/qcvtn_u16_s32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/qcvtn_u16_u32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/qrshrn_s16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/qrshrn_u16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/qrshrun_u16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_bf16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_bf16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_f16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_f16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_f32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_f32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_f64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_f64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_s16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_s16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_s32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_s32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_s64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_s64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_s8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_s8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_u16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_u16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_u32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_u32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_u64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_u64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_u8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/st1_u8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_bf16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_bf16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_f16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_f16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_f32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_f32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_f64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_f64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_s16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_s16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_s32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_s32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_s64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_s64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_s8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_s8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_u16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_u16_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_u32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_u32_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_u64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_u64_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_u8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/stnt1_u8_x4.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilege_b16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilege_b32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilege_b64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilege_b8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilege_c16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilege_c32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilege_c64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilege_c8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilegt_b16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilegt_b32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilegt_b64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilegt_b8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilegt_c16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilegt_c32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilegt_c64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilegt_c8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilele_b16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilele_b32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilele_b64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilele_b8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilele_c16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilele_c32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilele_c64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilele_c8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilelt_b16_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilelt_b32_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilelt_b64_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilelt_b8_x2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilelt_c16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilelt_c32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilelt_c64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/whilelt_c8.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-early-ra.cc9
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-sve2.def45
-rw-r--r--gcc/config/aarch64/aarch64-sve.md10
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md38
-rw-r--r--gcc/config/aarch64/aarch64.h4
-rw-r--r--gcc/config/aarch64/iterators.md44
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/clamp_3.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/clamp_4.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslb_f32.c72
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslb_lane_f32.c91
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslt_f32.c72
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslt_lane_f32.c91
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f16.c49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f32.c49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f64.c49
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c16.c46
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c32.c46
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c64.c46
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c8.c46
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_f32.c51
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_f32.c100
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_s32.c100
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_u32.c100
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_s32.c51
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_u32.c51
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_bf16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_bf16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s8_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s8_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u8_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u8_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_bf16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_bf16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s8_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s8_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u8_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u8_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c16.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c16_x2.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c32.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c32_x2.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c64.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c64_x2.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c8.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c8_x2.c61
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c16.c48
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c32.c48
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c64.c48
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c8.c48
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_s16_s32_x2.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_u16_s32_x2.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_u16_u32_x2.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrn_s16_x2.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrn_u16_x2.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrun_u16_x2.c57
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_bf16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_bf16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s8_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s8_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u8_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u8_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_bf16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_bf16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s8_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s8_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u16_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u16_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u32_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u32_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u64_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u64_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u8_x2.c269
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u8_x4.c361
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b16_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b32_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b64_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b8_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c16.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c32.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c64.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c8.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b16_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b32_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b64_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b8_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c16.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c32.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c64.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c8.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b16_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b32_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b64_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b8_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c16.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c32.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c64.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c8.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b16_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b32_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b64_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b8_x2.c126
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c16.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c32.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c64.c124
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c8.c124
171 files changed, 36491 insertions, 65 deletions
diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc
index bbd8468..0db8ea2 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -1062,8 +1062,9 @@ is_stride_candidate (rtx_insn *insn)
return false;
auto stride_type = get_attr_stride_type (insn);
- return (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
- || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE);
+ return (TARGET_STREAMING_SME2
+ && (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
+ || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE));
}
// Go through the constraints of INSN, which has already been extracted,
@@ -3213,9 +3214,9 @@ early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
auto stride_type = get_attr_stride_type (insn);
rtx pat = PATTERN (insn);
rtx op;
- if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
+ if (TARGET_STREAMING_SME2 && stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
op = SET_DEST (pat);
- else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
+ else if (TARGET_STREAMING_SME2 && stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
op = XVECEXP (SET_SRC (pat), 0, 1);
else
return false;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 5cc32aa..9e8aad9 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -226,40 +226,53 @@ DEF_SVE_FUNCTION (svpsel_lane, select_pred, all_pred_count, none)
DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2)
-DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
+#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p1, AARCH64_FL_SME2)
DEF_SVE_FUNCTION (svbfmlslb, ternary_bfloat_opt_n, s_float, none)
DEF_SVE_FUNCTION (svbfmlslb_lane, ternary_bfloat_lane, s_float, none)
DEF_SVE_FUNCTION (svbfmlslt, ternary_bfloat_opt_n, s_float, none)
DEF_SVE_FUNCTION (svbfmlslt_lane, ternary_bfloat_lane, s_float, none)
DEF_SVE_FUNCTION (svclamp, clamp, all_float, none)
-DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
DEF_SVE_FUNCTION (svcntp, count_pred_c, all_count, none)
-DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_h_s_float, x2, none)
-DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_s_s, x24, none)
-DEF_SVE_FUNCTION_GS (svcvtn, unary_convertxn, cvt_h_s_float, x2, none)
DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n_or_011, s_narrow_fsu, none)
DEF_SVE_FUNCTION (svdot_lane, ternary_qq_or_011_lane, s_narrow_fsu, none)
DEF_SVE_FUNCTION_GS (svld1, load, all_data, x24, implicit)
DEF_SVE_FUNCTION_GS (svldnt1, load, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svpext_lane, extract_pred, all_count, x12, none)
+DEF_SVE_FUNCTION (svptrue, inherent, all_count, none)
+DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svst1, storexn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svstnt1, storexn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svwhilege, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilege, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilegt, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilegt, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilele, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilele, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilelt, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilelt, compare_scalar_count, while_x_c, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2)
+DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
+DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_h_s_float, x2, none)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_s_s, x24, none)
+DEF_SVE_FUNCTION_GS (svcvtn, unary_convertxn, cvt_h_s_float, x2, none)
DEF_SVE_FUNCTION_GS (svmax, binary_opt_single_n, all_arith, x24, none)
DEF_SVE_FUNCTION_GS (svmaxnm, binary_opt_single_n, all_float, x24, none)
DEF_SVE_FUNCTION_GS (svmin, binary_opt_single_n, all_arith, x24, none)
DEF_SVE_FUNCTION_GS (svminnm, binary_opt_single_n, all_float, x24, none)
-DEF_SVE_FUNCTION_GS (svpext_lane, extract_pred, all_count, x12, none)
-DEF_SVE_FUNCTION (svptrue, inherent, all_count, none)
DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x4, x4, none)
-DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x4, x4, none)
DEF_SVE_FUNCTION_GS (svqdmulh, binary_opt_single_n, all_signed, x24, none)
DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x4, x4, none)
-DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x4, x4, none)
DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x4, x4, none)
-DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x2, x2, none)
DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x4, x4, none)
DEF_SVE_FUNCTION_GS (svrinta, unaryxn, s_float, x24, none)
DEF_SVE_FUNCTION_GS (svrintm, unaryxn, s_float, x24, none)
@@ -267,19 +280,9 @@ DEF_SVE_FUNCTION_GS (svrintn, unaryxn, s_float, x24, none)
DEF_SVE_FUNCTION_GS (svrintp, unaryxn, s_float, x24, none)
DEF_SVE_FUNCTION_GS (svrshl, binary_int_opt_single_n, all_integer, x24, none)
DEF_SVE_FUNCTION_GS (svsel, binaryxn, all_data, x24, implicit)
-DEF_SVE_FUNCTION_GS (svst1, storexn, all_data, x24, implicit)
-DEF_SVE_FUNCTION_GS (svstnt1, storexn, all_data, x24, implicit)
DEF_SVE_FUNCTION_GS (svunpk, unary_convertxn, bhs_widen, x24, none)
DEF_SVE_FUNCTION_GS (svuzp, unaryxn, all_data, x24, none)
DEF_SVE_FUNCTION_GS (svuzpq, unaryxn, all_data, x24, none)
-DEF_SVE_FUNCTION_GS (svwhilege, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilege, compare_scalar_count, while_x_c, none)
-DEF_SVE_FUNCTION_GS (svwhilegt, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilegt, compare_scalar_count, while_x_c, none)
-DEF_SVE_FUNCTION_GS (svwhilele, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilele, compare_scalar_count, while_x_c, none)
-DEF_SVE_FUNCTION_GS (svwhilelt, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilelt, compare_scalar_count, while_x_c, none)
DEF_SVE_FUNCTION_GS (svzip, unaryxn, all_data, x24, none)
DEF_SVE_FUNCTION_GS (svzipq, unaryxn, all_data, x24, none)
#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 3d92a2a..f89036c 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -7222,7 +7222,7 @@
(match_operand:SVE_FULL_SDI 4 "register_operand")))]
"TARGET_SVE
&& (<SVE_FULL_SDI:elem_bits> == <SVE_FULL_BHI:elem_bits> * 4
- || (TARGET_STREAMING_SME2
+ || (TARGET_SVE2p1_OR_SME2
&& <SVE_FULL_SDI:elem_bits> == 32
&& <SVE_FULL_BHI:elem_bits> == 16))"
{@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
@@ -7839,8 +7839,8 @@
;; - BFDOT (BF16)
;; - BFMLALB (BF16)
;; - BFMLALT (BF16)
-;; - BFMLSLB (SME2)
-;; - BFMLSLT (SME2)
+;; - BFMLSLB (SVE2p1, SME2)
+;; - BFMLSLT (SVE2p1, SME2)
;; - BFMMLA (BF16)
;; -------------------------------------------------------------------------
@@ -7851,7 +7851,7 @@
(match_operand:VNx8BF 2 "register_operand")
(match_operand:VNx8BF 3 "register_operand")]
SVE_BFLOAT_TERNARY_LONG))]
- "TARGET_SVE_BF16"
+ ""
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , 0 , w , w ; * ] <sve_fp_op>\t%0.s, %2.h, %3.h
[ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h
@@ -7867,7 +7867,7 @@
(match_operand:VNx8BF 3 "register_operand")
(match_operand:SI 4 "const_int_operand")]
SVE_BFLOAT_TERNARY_LONG_LANE))]
- "TARGET_SVE_BF16"
+ ""
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , 0 , w , y ; * ] <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
[ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index fd4bd42..61bae64 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -140,7 +140,7 @@
[(match_operand:VNx16BI 2 "register_operand" "Uph")
(match_operand:SVE_FULLx24 1 "memory_operand" "m")]
LD1_COUNT))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"<optab><Vesize>\t%0, %K2/z, %1"
[(set_attr "stride_type" "ld1_consecutive")]
)
@@ -276,7 +276,7 @@
(match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
(match_dup 0)]
ST1_COUNT))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"<optab><Vesize>\t%1, %K2, %0"
[(set_attr "stride_type" "st1_consecutive")]
)
@@ -370,7 +370,7 @@
(define_insn "@aarch64_sve_ptrue_c<BHSD_BITS>"
[(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
(unspec:VNx16BI [(const_int BHSD_BITS)] UNSPEC_PTRUE_C))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"ptrue\t%K0.<bits_etype>"
)
@@ -388,7 +388,7 @@
(match_operand:DI 2 "const_int_operand")
(const_int BHSD_BITS)]
UNSPEC_PEXT))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"pext\t%0.<bits_etype>, %K1[%2]"
)
@@ -399,7 +399,7 @@
(match_operand:DI 2 "const_int_operand")
(const_int BHSD_BITS)]
UNSPEC_PEXTx2))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"pext\t{%S0.<bits_etype>, %T0.<bits_etype>}, %K1[%2]"
)
@@ -451,7 +451,7 @@
(match_operand:DI 2 "const_int_operand")
(const_int BHSD_BITS)]
UNSPEC_CNTP_C))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"cntp\t%x0, %K1.<bits_etype>, vlx%2"
)
@@ -1117,7 +1117,7 @@
UNSPEC_FMAXNM)
(match_operand:SVE_FULL_F 3 "register_operand")]
UNSPEC_FMINNM))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
{@ [cons: =0, 1, 2, 3; attrs: movprfx]
[ w, %0, w, w; * ] fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
[ ?&w, w, w, w; yes ] movprfx\t%0, %1\;fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
@@ -1137,7 +1137,7 @@
UNSPEC_COND_FMAXNM)
(match_operand:SVE_FULL_F 3 "register_operand")]
UNSPEC_COND_FMINNM))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
{@ [cons: =0, 1, 2, 3; attrs: movprfx]
[ w, %0, w, w; * ] #
[ ?&w, w, w, w; yes ] #
@@ -2039,7 +2039,7 @@
(match_operand:VNx8HI 2 "register_operand")]
DOTPROD)
(match_operand:VNx4SI 3 "register_operand")))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , w , w , 0 ; * ] <sur>dot\t%0.s, %1.h, %2.h
[ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.h, %2.h
@@ -2137,7 +2137,7 @@
(match_operand:VNx8HF 2 "register_operand")]
UNSPEC_FDOT)
(match_operand:VNx4SF 3 "register_operand")))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
[ w , w , w , 0 ; * ] fdot\t%0.s, %1.h, %2.h
[ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;fdot\t%0.s, %1.h, %2.h
@@ -2155,7 +2155,7 @@
UNSPEC_SVE_LANE_SELECT)]
UNSPEC_FDOT)
(match_operand:VNx4SF 4 "register_operand")))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
{@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
[ w , w , y , 0 ; * ] fdot\t%0.s, %1.h, %2.h[%3]
[ ?&w , w , y , w ; yes ] movprfx\t%0, %4\;fdot\t%0.s, %1.h, %2.h[%3]
@@ -2222,7 +2222,7 @@
(unspec:VNx8HI_ONLY
[(match_operand:VNx8SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
SVE_QCVTxN))]
- "TARGET_STREAMING_SME2"
+ ""
"<optab>\t%0.h, %1"
)
@@ -2336,6 +2336,14 @@
;; -------------------------------------------------------------------------
;; ---- [INT] Multi-vector narrowing right shifts
;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQRSHR
+;; - SQRSHRN
+;; - SQRSHRU
+;; - SQRSHRUN
+;; - UQRSHR
+;; - UQRSHRN
+;; -------------------------------------------------------------------------
(define_insn "@aarch64_sve_<sve_int_op><mode>"
[(set (match_operand:<VNARROW> 0 "register_operand" "=w")
@@ -2343,7 +2351,7 @@
[(match_operand:SVE_FULL_SIx2_SDIx4 1 "register_operand" "Uw<vector_count>")
(match_operand:DI 2 "const_int_operand")]
SVE2_INT_SHIFT_IMM_NARROWxN))]
- "TARGET_STREAMING_SME2"
+ "(<MODE>mode == VNx8SImode || TARGET_STREAMING_SME2)"
"<sve_int_op>\t%0.<Ventype>, %1, #%2"
)
@@ -3145,7 +3153,7 @@
(const_int BHSD_BITS)]
SVE_WHILE_ORDER))
(clobber (reg:CC_NZC CC_REGNUM))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"while<cmp_op>\t{%S0.<bits_etype>, %T0.<bits_etype>}, %x1, %x2"
)
@@ -3159,7 +3167,7 @@
(match_operand:DI 3 "const_int_operand")]
SVE_WHILE_ORDER))
(clobber (reg:CC_NZC CC_REGNUM))]
- "TARGET_STREAMING_SME2"
+ "TARGET_SVE2p1_OR_SME2"
"while<cmp_op>\t%K0.<bits_etype>, %x1, %x2, vlx%3"
)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 404efa1..f07b2c4 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -490,6 +490,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
functions, since streaming mode itself implies SME. */
#define TARGET_SVE2p1_OR_SME (TARGET_SVE2p1 || TARGET_STREAMING)
+#define TARGET_SVE2p1_OR_SME2 \
+ ((TARGET_SVE2p1 || TARGET_STREAMING) \
+ && (TARGET_SME2 || TARGET_NON_STREAMING))
+
/* Standard register usage. */
/* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b8924cd..73d6748 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -3050,19 +3050,19 @@
UNSPEC_FMIN UNSPEC_FMINNM])
(define_int_iterator SVE_BFLOAT_TERNARY_LONG
- [UNSPEC_BFDOT
- UNSPEC_BFMLALB
- UNSPEC_BFMLALT
- (UNSPEC_BFMLSLB "TARGET_STREAMING_SME2")
- (UNSPEC_BFMLSLT "TARGET_STREAMING_SME2")
- (UNSPEC_BFMMLA "TARGET_NON_STREAMING")])
+ [(UNSPEC_BFDOT "TARGET_SVE_BF16")
+ (UNSPEC_BFMLALB "TARGET_SVE_BF16")
+ (UNSPEC_BFMLALT "TARGET_SVE_BF16")
+ (UNSPEC_BFMLSLB "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_BFMLSLT "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_BFMMLA "TARGET_SVE_BF16 && TARGET_NON_STREAMING")])
(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE
- [UNSPEC_BFDOT
- UNSPEC_BFMLALB
- UNSPEC_BFMLALT
- (UNSPEC_BFMLSLB "TARGET_STREAMING_SME2")
- (UNSPEC_BFMLSLT "TARGET_STREAMING_SME2")])
+ [(UNSPEC_BFDOT "TARGET_SVE_BF16")
+ (UNSPEC_BFMLALB "TARGET_SVE_BF16")
+ (UNSPEC_BFMLALT "TARGET_SVE_BF16")
+ (UNSPEC_BFMLSLB "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_BFMLSLT "TARGET_SVE2p1_OR_SME2")])
(define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
UNSPEC_IORV
@@ -3338,12 +3338,13 @@
UNSPEC_UQRSHRNT
UNSPEC_UQSHRNT])
-(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWxN [UNSPEC_SQRSHR
- UNSPEC_SQRSHRN
- UNSPEC_SQRSHRU
- UNSPEC_SQRSHRUN
- UNSPEC_UQRSHR
- UNSPEC_UQRSHRN])
+(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWxN
+ [(UNSPEC_SQRSHR "TARGET_STREAMING_SME2")
+ (UNSPEC_SQRSHRN "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_SQRSHRU "TARGET_STREAMING_SME2")
+ (UNSPEC_SQRSHRUN "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_UQRSHR "TARGET_STREAMING_SME2")
+ (UNSPEC_UQRSHRN "TARGET_SVE2p1_OR_SME2")])
(define_int_iterator SVE2_INT_SHIFT_INSERT [UNSPEC_SLI UNSPEC_SRI])
@@ -3488,9 +3489,12 @@
(define_int_iterator SVE2_PMULL_PAIR [UNSPEC_PMULLB_PAIR UNSPEC_PMULLT_PAIR])
-(define_int_iterator SVE_QCVTxN [UNSPEC_SQCVT UNSPEC_SQCVTN
- UNSPEC_SQCVTU UNSPEC_SQCVTUN
- UNSPEC_UQCVT UNSPEC_UQCVTN])
+(define_int_iterator SVE_QCVTxN [(UNSPEC_SQCVT "TARGET_STREAMING_SME2")
+ (UNSPEC_SQCVTN "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_SQCVTU "TARGET_STREAMING_SME2")
+ (UNSPEC_SQCVTUN "TARGET_SVE2p1_OR_SME2")
+ (UNSPEC_UQCVT "TARGET_STREAMING_SME2")
+ (UNSPEC_UQCVTN "TARGET_SVE2p1_OR_SME2")])
(define_int_iterator SVE2_SFx24_UNARY [UNSPEC_FRINTA UNSPEC_FRINTM
UNSPEC_FRINTN UNSPEC_FRINTP])
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_3.c b/gcc/testsuite/gcc.target/aarch64/sve/clamp_3.c
new file mode 100644
index 0000000..216a1ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_3.c
@@ -0,0 +1,28 @@
+// { dg-options "-O" }
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p1"
+
+#define TEST(TYPE) \
+ TYPE \
+ tied1_##TYPE(TYPE a, TYPE b, TYPE c) \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), a, b), c); \
+ } \
+ \
+ TYPE \
+ tied2_##TYPE(TYPE a, TYPE b, TYPE c) \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, a), c); \
+ }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.h, z1\.h, z2\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.s, z1\.s, z2\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.d, z1\.d, z2\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clamp_4.c b/gcc/testsuite/gcc.target/aarch64/sve/clamp_4.c
new file mode 100644
index 0000000..8df25b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clamp_4.c
@@ -0,0 +1,22 @@
+// { dg-options "-O" }
+
+#include <arm_sve.h>
+
+#pragma GCC target "+sve2p1"
+
+#define TEST(TYPE) \
+ TYPE \
+ untied_##TYPE(TYPE a, TYPE b, TYPE c, TYPE d) \
+ { \
+ return svminnm_x(svptrue_b8(), svmaxnm_x(svptrue_b8(), b, c), d); \
+ }
+
+TEST(svfloat16_t)
+TEST(svfloat32_t)
+TEST(svfloat64_t)
+
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.h, z2\.h, z3\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.s, z2\.s, z3\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfclamp\tz0\.d, z2\.d, z3\.d\n} 1 } } */
+
+/* { dg-final { scan-assembler-times {\tmovprfx\tz0, z1\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslb_f32.c
new file mode 100644
index 0000000..7f5610d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslb_f32.c
@@ -0,0 +1,72 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** bfmlslb_f32_tied1:
+** bfmlslb z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_f32 (z0, z4, z5),
+ z0 = svbfmlslb (z0, z4, z5))
+
+/*
+** bfmlslb_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslb z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslb_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslb_f32 (z4, z0, z1),
+ z0_res = svbfmlslb (z4, z0, z1))
+
+/*
+** bfmlslb_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslb z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslb_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslb_f32 (z4, z1, z0),
+ z0_res = svbfmlslb (z4, z1, z0))
+
+/*
+** bfmlslb_f32_untied:
+** movprfx z0, z1
+** bfmlslb z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_f32 (z1, z4, z5),
+ z0 = svbfmlslb (z1, z4, z5))
+
+/*
+** bfmlslb_h7_f32_tied1:
+** mov (z[0-9]+\.h), h7
+** bfmlslb z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlslb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslb_n_f32 (z0, z4, d7),
+ z0 = svbfmlslb (z0, z4, d7))
+
+/*
+** bfmlslb_h7_f32_untied:
+** mov (z[0-9]+\.h), h7
+** movprfx z0, z1
+** bfmlslb z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlslb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslb_n_f32 (z1, z4, d7),
+ z0 = svbfmlslb (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslb_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslb_lane_f32.c
new file mode 100644
index 0000000..ac0d712
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslb_lane_f32.c
@@ -0,0 +1,91 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** bfmlslb_lane_0_f32_tied1:
+** bfmlslb z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z0, z4, z5, 0),
+ z0 = svbfmlslb_lane (z0, z4, z5, 0))
+
+/*
+** bfmlslb_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslb z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslb_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslb_lane_f32 (z4, z0, z1, 0),
+ z0_res = svbfmlslb_lane (z4, z0, z1, 0))
+
+/*
+** bfmlslb_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslb z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslb_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslb_lane_f32 (z4, z1, z0, 0),
+ z0_res = svbfmlslb_lane (z4, z1, z0, 0))
+
+/*
+** bfmlslb_lane_0_f32_untied:
+** movprfx z0, z1
+** bfmlslb z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z1, z4, z5, 0),
+ z0 = svbfmlslb_lane (z1, z4, z5, 0))
+
+/*
+** bfmlslb_lane_1_f32:
+** bfmlslb z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_1_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z0, z4, z5, 1),
+ z0 = svbfmlslb_lane (z0, z4, z5, 1))
+
+/*
+** bfmlslb_lane_7_f32:
+** bfmlslb z0\.s, z4\.h, z5\.h\[7\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslb_lane_7_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslb_lane_f32 (z0, z4, z5, 7),
+ z0 = svbfmlslb_lane (z0, z4, z5, 7))
+
+/*
+** bfmlslb_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** bfmlslb z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslb_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+ z0 = svbfmlslb_lane_f32 (z0, z1, z8, 1),
+ z0 = svbfmlslb_lane (z0, z1, z8, 1))
+
+/*
+** bfmlslb_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** bfmlslb z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslb_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+ z0 = svbfmlslb_lane_f32 (z0, z1, z16, 1),
+ z0 = svbfmlslb_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslt_f32.c
new file mode 100644
index 0000000..4333b0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslt_f32.c
@@ -0,0 +1,72 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** bfmlslt_f32_tied1:
+** bfmlslt z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_f32 (z0, z4, z5),
+ z0 = svbfmlslt (z0, z4, z5))
+
+/*
+** bfmlslt_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_f32 (z4, z0, z1),
+ z0_res = svbfmlslt (z4, z0, z1))
+
+/*
+** bfmlslt_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_f32 (z4, z1, z0),
+ z0_res = svbfmlslt (z4, z1, z0))
+
+/*
+** bfmlslt_f32_untied:
+** movprfx z0, z1
+** bfmlslt z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_f32 (z1, z4, z5),
+ z0 = svbfmlslt (z1, z4, z5))
+
+/*
+** bfmlslt_h7_f32_tied1:
+** mov (z[0-9]+\.h), h7
+** bfmlslt z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlslt_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslt_n_f32 (z0, z4, d7),
+ z0 = svbfmlslt (z0, z4, d7))
+
+/*
+** bfmlslt_h7_f32_untied:
+** mov (z[0-9]+\.h), h7
+** movprfx z0, z1
+** bfmlslt z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlslt_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlslt_n_f32 (z1, z4, d7),
+ z0 = svbfmlslt (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslt_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslt_lane_f32.c
new file mode 100644
index 0000000..0a60a75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bfmlslt_lane_f32.c
@@ -0,0 +1,91 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** bfmlslt_lane_0_f32_tied1:
+** bfmlslt z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z0, z4, z5, 0),
+ z0 = svbfmlslt_lane (z0, z4, z5, 0))
+
+/*
+** bfmlslt_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_lane_f32 (z4, z0, z1, 0),
+ z0_res = svbfmlslt_lane (z4, z0, z1, 0))
+
+/*
+** bfmlslt_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlslt z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlslt_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlslt_lane_f32 (z4, z1, z0, 0),
+ z0_res = svbfmlslt_lane (z4, z1, z0, 0))
+
+/*
+** bfmlslt_lane_0_f32_untied:
+** movprfx z0, z1
+** bfmlslt z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z1, z4, z5, 0),
+ z0 = svbfmlslt_lane (z1, z4, z5, 0))
+
+/*
+** bfmlslt_lane_1_f32:
+** bfmlslt z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_1_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z0, z4, z5, 1),
+ z0 = svbfmlslt_lane (z0, z4, z5, 1))
+
+/*
+** bfmlslt_lane_7_f32:
+** bfmlslt z0\.s, z4\.h, z5\.h\[7\]
+** ret
+*/
+TEST_DUAL_Z (bfmlslt_lane_7_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlslt_lane_f32 (z0, z4, z5, 7),
+ z0 = svbfmlslt_lane (z0, z4, z5, 7))
+
+/*
+** bfmlslt_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** bfmlslt z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslt_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+ z0 = svbfmlslt_lane_f32 (z0, z1, z8, 1),
+ z0 = svbfmlslt_lane (z0, z1, z8, 1))
+
+/*
+** bfmlslt_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** bfmlslt z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlslt_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+ z0 = svbfmlslt_lane_f32 (z0, z1, z16, 1),
+ z0 = svbfmlslt_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f16.c
new file mode 100644
index 0000000..a9febd8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f16.c
@@ -0,0 +1,49 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** clamp_f16_tied1:
+** fclamp z0\.h, z1\.h, z2\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_tied1, svfloat16_t,
+ z0 = svclamp_f16 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_f16_tied2:
+** fclamp z0\.h, z1\.h, z2\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_tied2, svfloat16_t,
+ z0 = svclamp_f16 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_f16_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** fclamp z0\.h, z2\.h, \1\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_tied3, svfloat16_t,
+ z0 = svclamp_f16 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_f16_untied:
+** movprfx z0, z1
+** fclamp z0\.h, z2\.h, z3\.h
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f16_untied, svfloat16_t,
+ z0 = svclamp_f16 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f32.c
new file mode 100644
index 0000000..84381ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f32.c
@@ -0,0 +1,49 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** clamp_f32_tied1:
+** fclamp z0\.s, z1\.s, z2\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_tied1, svfloat32_t,
+ z0 = svclamp_f32 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_f32_tied2:
+** fclamp z0\.s, z1\.s, z2\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_tied2, svfloat32_t,
+ z0 = svclamp_f32 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** fclamp z0\.s, z2\.s, \1\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_tied3, svfloat32_t,
+ z0 = svclamp_f32 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_f32_untied:
+** movprfx z0, z1
+** fclamp z0\.s, z2\.s, z3\.s
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f32_untied, svfloat32_t,
+ z0 = svclamp_f32 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f64.c
new file mode 100644
index 0000000..e26a129
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/clamp_f64.c
@@ -0,0 +1,49 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** clamp_f64_tied1:
+** fclamp z0\.d, z1\.d, z2\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_tied1, svfloat64_t,
+ z0 = svclamp_f64 (z0, z1, z2),
+ z0 = svclamp (z0, z1, z2))
+
+/*
+** clamp_f64_tied2:
+** fclamp z0\.d, z1\.d, z2\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_tied2, svfloat64_t,
+ z0 = svclamp_f64 (z1, z0, z2),
+ z0 = svclamp (z1, z0, z2))
+
+/*
+** clamp_f64_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z1
+** fclamp z0\.d, z2\.d, \1\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_tied3, svfloat64_t,
+ z0 = svclamp_f64 (z1, z2, z0),
+ z0 = svclamp (z1, z2, z0))
+
+/*
+** clamp_f64_untied:
+** movprfx z0, z1
+** fclamp z0\.d, z2\.d, z3\.d
+** ret
+*/
+TEST_UNIFORM_Z (clamp_f64_untied, svfloat64_t,
+ z0 = svclamp_f64 (z1, z2, z3),
+ z0 = svclamp (z1, z2, z3))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c16.c
new file mode 100644
index 0000000..6bd9431
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c16.c
@@ -0,0 +1,46 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.h, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c16 (pn0, 2),
+ x0 = svcntp_c16 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.h, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c16 (pn7, 4),
+ x15 = svcntp_c16 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.h, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c16 (pn8, 2),
+ x17 = svcntp_c16 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.h, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c16 (pn15, 4),
+ x0 = svcntp_c16 (pn15, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c32.c
new file mode 100644
index 0000000..cc428b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c32.c
@@ -0,0 +1,46 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.s, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c32 (pn0, 2),
+ x0 = svcntp_c32 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.s, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c32 (pn7, 4),
+ x15 = svcntp_c32 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.s, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c32 (pn8, 2),
+ x17 = svcntp_c32 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.s, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c32 (pn15, 4),
+ x0 = svcntp_c32 (pn15, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c64.c
new file mode 100644
index 0000000..e85b750
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c64.c
@@ -0,0 +1,46 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.d, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c64 (pn0, 2),
+ x0 = svcntp_c64 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.d, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c64 (pn7, 4),
+ x15 = svcntp_c64 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.d, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c64 (pn8, 2),
+ x17 = svcntp_c64 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.d, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c64 (pn15, 4),
+ x0 = svcntp_c64 (pn15, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c8.c
new file mode 100644
index 0000000..1120a6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/cntp_c8.c
@@ -0,0 +1,46 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** cntp_x0_pn0_2:
+** cntp x0, pn0\.b, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn0_2,
+ x0 = svcntp_c8 (pn0, 2),
+ x0 = svcntp_c8 (pn0, 2))
+
+/*
+** cntp_x15_pn7_4:
+** cntp x15, pn7\.b, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x15_pn7_4,
+ x15 = svcntp_c8 (pn7, 4),
+ x15 = svcntp_c8 (pn7, 4))
+
+/*
+** cntp_x17_pn8_2:
+** cntp x17, pn8\.b, vlx2
+** ret
+*/
+TEST_COUNT_PN (cntp_x17_pn8_2,
+ x17 = svcntp_c8 (pn8, 2),
+ x17 = svcntp_c8 (pn8, 2))
+
+/*
+** cntp_x0_pn15_4:
+** cntp x0, pn15\.b, vlx4
+** ret
+*/
+TEST_COUNT_PN (cntp_x0_pn15_4,
+ x0 = svcntp_c8 (pn15, 4),
+ x0 = svcntp_c8 (pn15, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_f32.c
new file mode 100644
index 0000000..2eb37e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_f32.c
@@ -0,0 +1,51 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_f32_tied1:
+** fdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_f32_tied1, svfloat32_t, svfloat16_t,
+ z0 = svdot_f32_f16 (z0, z4, z5),
+ z0 = svdot (z0, z4, z5))
+
+/*
+** dot_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_f32_tied2, svfloat32_t, svfloat16_t,
+ z0_res = svdot_f32_f16 (z4, z0, z1),
+ z0_res = svdot (z4, z0, z1))
+
+/*
+** dot_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_f32_tied3, svfloat32_t, svfloat16_t,
+ z0_res = svdot_f32_f16 (z4, z1, z0),
+ z0_res = svdot (z4, z1, z0))
+
+/*
+** dot_f32_untied:
+** movprfx z0, z1
+** fdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_f32_untied, svfloat32_t, svfloat16_t,
+ z0 = svdot_f32_f16 (z1, z4, z5),
+ z0 = svdot (z1, z4, z5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_f32.c
new file mode 100644
index 0000000..d7aade9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_f32.c
@@ -0,0 +1,100 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_lane_0_f32_tied1:
+** fdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_f32_tied1, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 0),
+ z0 = svdot_lane (z0, z4, z5, 0))
+
+/*
+** dot_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_f32_tied2, svfloat32_t, svfloat16_t,
+ z0_res = svdot_lane_f32_f16 (z4, z0, z1, 0),
+ z0_res = svdot_lane (z4, z0, z1, 0))
+
+/*
+** dot_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_f32_tied3, svfloat32_t, svfloat16_t,
+ z0_res = svdot_lane_f32_f16 (z4, z1, z0, 0),
+ z0_res = svdot_lane (z4, z1, z0, 0))
+
+/*
+** dot_lane_0_f32_untied:
+** movprfx z0, z1
+** fdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_f32_untied, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z1, z4, z5, 0),
+ z0 = svdot_lane (z1, z4, z5, 0))
+
+/*
+** dot_lane_1_f32:
+** fdot z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_f32, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 1),
+ z0 = svdot_lane (z0, z4, z5, 1))
+
+/*
+** dot_lane_2_f32:
+** fdot z0\.s, z4\.h, z5\.h\[2\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_2_f32, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 2),
+ z0 = svdot_lane (z0, z4, z5, 2))
+
+/*
+** dot_lane_3_f32:
+** fdot z0\.s, z4\.h, z5\.h\[3\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_3_f32, svfloat32_t, svfloat16_t,
+ z0 = svdot_lane_f32_f16 (z0, z4, z5, 3),
+ z0 = svdot_lane (z0, z4, z5, 3))
+
+/*
+** dot_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** fdot z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_f32, svfloat32_t, svfloat16_t, z8,
+ z0 = svdot_lane_f32_f16 (z0, z1, z8, 1),
+ z0 = svdot_lane (z0, z1, z8, 1))
+
+/*
+** dot_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** fdot z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z16_f32, svfloat32_t, svfloat16_t, z16,
+ z0 = svdot_lane_f32_f16 (z0, z1, z16, 1),
+ z0 = svdot_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_s32.c
new file mode 100644
index 0000000..8e1d6cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_s32.c
@@ -0,0 +1,100 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_lane_0_s32_tied1:
+** sdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_s32_tied1, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 0),
+ z0 = svdot_lane (z0, z4, z5, 0))
+
+/*
+** dot_lane_0_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_s32_tied2, svint32_t, svint16_t,
+ z0_res = svdot_lane_s32_s16 (z4, z0, z1, 0),
+ z0_res = svdot_lane (z4, z0, z1, 0))
+
+/*
+** dot_lane_0_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_s32_tied3, svint32_t, svint16_t,
+ z0_res = svdot_lane_s32_s16 (z4, z1, z0, 0),
+ z0_res = svdot_lane (z4, z1, z0, 0))
+
+/*
+** dot_lane_0_s32_untied:
+** movprfx z0, z1
+** sdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_s32_untied, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z1, z4, z5, 0),
+ z0 = svdot_lane (z1, z4, z5, 0))
+
+/*
+** dot_lane_1_s32:
+** sdot z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_s32, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 1),
+ z0 = svdot_lane (z0, z4, z5, 1))
+
+/*
+** dot_lane_2_s32:
+** sdot z0\.s, z4\.h, z5\.h\[2\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_2_s32, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 2),
+ z0 = svdot_lane (z0, z4, z5, 2))
+
+/*
+** dot_lane_3_s32:
+** sdot z0\.s, z4\.h, z5\.h\[3\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_3_s32, svint32_t, svint16_t,
+ z0 = svdot_lane_s32_s16 (z0, z4, z5, 3),
+ z0 = svdot_lane (z0, z4, z5, 3))
+
+/*
+** dot_lane_z8_s32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** sdot z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_s32, svint32_t, svint16_t, z8,
+ z0 = svdot_lane_s32_s16 (z0, z1, z8, 1),
+ z0 = svdot_lane (z0, z1, z8, 1))
+
+/*
+** dot_lane_z16_s32:
+** mov (z[0-7])\.d, z16\.d
+** sdot z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z16_s32, svint32_t, svint16_t, z16,
+ z0 = svdot_lane_s32_s16 (z0, z1, z16, 1),
+ z0 = svdot_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_u32.c
new file mode 100644
index 0000000..b648c95
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_u32.c
@@ -0,0 +1,100 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_lane_0_u32_tied1:
+** udot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_u32_tied1, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 0),
+ z0 = svdot_lane (z0, z4, z5, 0))
+
+/*
+** dot_lane_0_u32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_u32_tied2, svuint32_t, svuint16_t,
+ z0_res = svdot_lane_u32_u16 (z4, z0, z1, 0),
+ z0_res = svdot_lane (z4, z0, z1, 0))
+
+/*
+** dot_lane_0_u32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_u32_tied3, svuint32_t, svuint16_t,
+ z0_res = svdot_lane_u32_u16 (z4, z1, z0, 0),
+ z0_res = svdot_lane (z4, z1, z0, 0))
+
+/*
+** dot_lane_0_u32_untied:
+** movprfx z0, z1
+** udot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_u32_untied, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z1, z4, z5, 0),
+ z0 = svdot_lane (z1, z4, z5, 0))
+
+/*
+** dot_lane_1_u32:
+** udot z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_u32, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 1),
+ z0 = svdot_lane (z0, z4, z5, 1))
+
+/*
+** dot_lane_2_u32:
+** udot z0\.s, z4\.h, z5\.h\[2\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_2_u32, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 2),
+ z0 = svdot_lane (z0, z4, z5, 2))
+
+/*
+** dot_lane_3_u32:
+** udot z0\.s, z4\.h, z5\.h\[3\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_3_u32, svuint32_t, svuint16_t,
+ z0 = svdot_lane_u32_u16 (z0, z4, z5, 3),
+ z0 = svdot_lane (z0, z4, z5, 3))
+
+/*
+** dot_lane_z8_u32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** udot z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_u32, svuint32_t, svuint16_t, z8,
+ z0 = svdot_lane_u32_u16 (z0, z1, z8, 1),
+ z0 = svdot_lane (z0, z1, z8, 1))
+
+/*
+** dot_lane_z16_u32:
+** mov (z[0-7])\.d, z16\.d
+** udot z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z16_u32, svuint32_t, svuint16_t, z16,
+ z0 = svdot_lane_u32_u16 (z0, z1, z16, 1),
+ z0 = svdot_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_s32.c
new file mode 100644
index 0000000..f22dff1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_s32.c
@@ -0,0 +1,51 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_s32_tied1:
+** sdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_s32_tied1, svint32_t, svint16_t,
+ z0 = svdot_s32_s16 (z0, z4, z5),
+ z0 = svdot (z0, z4, z5))
+
+/*
+** dot_s32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_s32_tied2, svint32_t, svint16_t,
+ z0_res = svdot_s32_s16 (z4, z0, z1),
+ z0_res = svdot (z4, z0, z1))
+
+/*
+** dot_s32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** sdot z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_s32_tied3, svint32_t, svint16_t,
+ z0_res = svdot_s32_s16 (z4, z1, z0),
+ z0_res = svdot (z4, z1, z0))
+
+/*
+** dot_s32_untied:
+** movprfx z0, z1
+** sdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_s32_untied, svint32_t, svint16_t,
+ z0 = svdot_s32_s16 (z1, z4, z5),
+ z0 = svdot (z1, z4, z5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_u32.c
new file mode 100644
index 0000000..92b6cb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_u32.c
@@ -0,0 +1,51 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** dot_u32_tied1:
+** udot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_u32_tied1, svuint32_t, svuint16_t,
+ z0 = svdot_u32_u16 (z0, z4, z5),
+ z0 = svdot (z0, z4, z5))
+
+/*
+** dot_u32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_u32_tied2, svuint32_t, svuint16_t,
+ z0_res = svdot_u32_u16 (z4, z0, z1),
+ z0_res = svdot (z4, z0, z1))
+
+/*
+** dot_u32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** udot z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (dot_u32_tied3, svuint32_t, svuint16_t,
+ z0_res = svdot_u32_u16 (z4, z1, z0),
+ z0_res = svdot (z4, z1, z0))
+
+/*
+** dot_u32_untied:
+** movprfx z0, z1
+** udot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (dot_u32_untied, svuint32_t, svuint16_t,
+ z0 = svdot_u32_u16 (z1, z4, z5),
+ z0 = svdot (z1, z4, z5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_bf16_x2.c
new file mode 100644
index 0000000..541ed4c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_bf16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_bf16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_bf16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_bf16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_bf16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_bf16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_bf16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ z17 = svld1_bf16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ z22 = svld1_bf16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ z28 = svld1_bf16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_bf16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_bf16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_bf16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_bf16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_bf16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_bf16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_bf16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_bf16_x4.c
new file mode 100644
index 0000000..f2a8027
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_bf16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_bf16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_bf16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_bf16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_bf16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_bf16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_bf16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_bf16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_bf16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_bf16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ z17 = svld1_bf16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ z22 = svld1_bf16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ z28 = svld1_bf16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_bf16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_bf16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_bf16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_bf16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_bf16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_bf16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_bf16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_bf16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_bf16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ z0 = svld1_vnum_bf16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f16_x2.c
new file mode 100644
index 0000000..1c1cfe0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_base, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_index, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_1, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_f16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_2, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_f16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_14, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_16, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_f16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_f16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_f16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z17, svfloat16x2_t, float16_t,
+ z17 = svld1_f16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z22, svfloat16x2_t, float16_t,
+ z22 = svld1_f16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z28, svfloat16x2_t, float16_t,
+ z28 = svld1_f16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn0, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn7, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_f16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn15, svfloat16x2_t, float16_t,
+ z0 = svld1_f16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_f16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_0, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_1, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_f16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_2, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_f16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_14, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_16, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_f16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_f16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ z0 = svld1_vnum_f16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f16_x4.c
new file mode 100644
index 0000000..39f72e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_base, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_index, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_1, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_2, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_3, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_f16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_4, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_f16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_28, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_f16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_32, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_f16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_f16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_f16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_f16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z17, svfloat16x4_t, float16_t,
+ z17 = svld1_f16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z22, svfloat16x4_t, float16_t,
+ z22 = svld1_f16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_z28, svfloat16x4_t, float16_t,
+ z28 = svld1_f16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn0, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn7, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_f16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f16_pn15, svfloat16x4_t, float16_t,
+ z0 = svld1_f16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_f16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_0, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_1, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_2, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_3, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_f16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_4, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_f16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_28, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_f16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_32, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_f16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_f16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_f16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ z0 = svld1_vnum_f16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f32_x2.c
new file mode 100644
index 0000000..eb72751
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f32_base:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_base, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_index:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_index, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_1, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw ()),
+ z0 = svld1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ld1_f32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_2, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ld1_f32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_14, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_16, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw ()),
+ z0 = svld1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ld1_f32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ld1_f32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ld1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ld1_f32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z17, svfloat32x2_t, float32_t,
+ z17 = svld1_f32_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_z22:
+** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z22, svfloat32x2_t, float32_t,
+ z22 = svld1_f32_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_z28:
+** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z28, svfloat32x2_t, float32_t,
+ z28 = svld1_f32_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn0, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn7, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_f32_pn15:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn15, svfloat32x2_t, float32_t,
+ z0 = svld1_f32_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_f32_0:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_0, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_1, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_f32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_2, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_f32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_14, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_16, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_f32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_f32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ z0 = svld1_vnum_f32_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f32_x4.c
new file mode 100644
index 0000000..99344e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f32_base:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_base, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_index:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_index, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_1, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw ()),
+ z0 = svld1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_2, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_3, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ld1_f32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_4, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ld1_f32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_28, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ld1_f32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_32, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw ()),
+ z0 = svld1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ld1_f32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ld1_f32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ld1_f32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ld1_f32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z17, svfloat32x4_t, float32_t,
+ z17 = svld1_f32_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_z22:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z22, svfloat32x4_t, float32_t,
+ z22 = svld1_f32_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_z28:
+** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_z28, svfloat32x4_t, float32_t,
+ z28 = svld1_f32_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn0, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn7, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_f32_pn15:
+** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f32_pn15, svfloat32x4_t, float32_t,
+ z0 = svld1_f32_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_f32_0:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_0, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_1, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_2, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_3, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_f32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_4, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_f32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_28, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_f32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_32, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_f32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_f32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_f32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ z0 = svld1_vnum_f32_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f64_x2.c
new file mode 100644
index 0000000..2e7094a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f64_base:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_base, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_index:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_index, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_1, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd ()),
+ z0 = svld1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ld1_f64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_2, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ld1_f64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_14, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_16, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd ()),
+ z0 = svld1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ld1_f64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ld1_f64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ld1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ld1_f64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z17, svfloat64x2_t, float64_t,
+ z17 = svld1_f64_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_z22:
+** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z22, svfloat64x2_t, float64_t,
+ z22 = svld1_f64_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_z28:
+** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z28, svfloat64x2_t, float64_t,
+ z28 = svld1_f64_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn0, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn7, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_f64_pn15:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn15, svfloat64x2_t, float64_t,
+ z0 = svld1_f64_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_f64_0:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_0, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_1, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_f64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_2, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_f64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_14, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_16, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_f64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_f64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ z0 = svld1_vnum_f64_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f64_x4.c
new file mode 100644
index 0000000..addde83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_f64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_f64_base:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_base, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_index:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_index, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_1, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd ()),
+ z0 = svld1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_2, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_3, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ld1_f64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_4, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ld1_f64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_28, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ld1_f64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_32, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd ()),
+ z0 = svld1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_f64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ld1_f64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ld1_f64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ld1_f64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ld1_f64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z17, svfloat64x4_t, float64_t,
+ z17 = svld1_f64_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_z22:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z22, svfloat64x4_t, float64_t,
+ z22 = svld1_f64_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_z28:
+** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_z28, svfloat64x4_t, float64_t,
+ z28 = svld1_f64_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn0, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn7, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_f64_pn15:
+** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_f64_pn15, svfloat64x4_t, float64_t,
+ z0 = svld1_f64_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_f64_0:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_0, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_1, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_2, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_3, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_f64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_4, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_f64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_28, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_f64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_32, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_f64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_f64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_f64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_f64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ z0 = svld1_vnum_f64_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s16_x2.c
new file mode 100644
index 0000000..d3d49ec
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_base, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_index, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_1, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_s16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_2, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_s16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_14, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_16, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m1, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_s16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m2, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_s16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m16, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m18, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_s16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z17, svint16x2_t, int16_t,
+ z17 = svld1_s16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z22, svint16x2_t, int16_t,
+ z22 = svld1_s16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z28, svint16x2_t, int16_t,
+ z28 = svld1_s16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn0, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn7, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn15, svint16x2_t, int16_t,
+ z0 = svld1_s16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_0, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_1, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_2, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_14, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_16, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m1, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m2, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m16, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m18, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_x1, svint16x2_t, int16_t,
+ z0 = svld1_vnum_s16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s16_x4.c
new file mode 100644
index 0000000..1fba648
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_base, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_index, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_1, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_2, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_3, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_s16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_4, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_s16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_28, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_s16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_32, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m1, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m2, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m3, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_s16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s16_m4, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_s16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m32, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_s16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_m36, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_s16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z17, svint16x4_t, int16_t,
+ z17 = svld1_s16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z22, svint16x4_t, int16_t,
+ z22 = svld1_s16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_z28, svint16x4_t, int16_t,
+ z28 = svld1_s16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn0, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn7, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s16_pn15, svint16x4_t, int16_t,
+ z0 = svld1_s16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_0, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_1, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_2, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_3, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_4, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_28, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_32, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m1, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m2, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m3, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m4, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m32, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_m36, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s16_x1, svint16x4_t, int16_t,
+ z0 = svld1_vnum_s16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s32_x2.c
new file mode 100644
index 0000000..2b1e1e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s32_base:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_base, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_index:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_index, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_1, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw ()),
+ z0 = svld1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ld1_s32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_2, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ld1_s32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_14, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_16, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m1, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw ()),
+ z0 = svld1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ld1_s32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m2, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ld1_s32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m16, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ld1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m18, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ld1_s32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z17, svint32x2_t, int32_t,
+ z17 = svld1_s32_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_z22:
+** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z22, svint32x2_t, int32_t,
+ z22 = svld1_s32_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_z28:
+** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z28, svint32x2_t, int32_t,
+ z28 = svld1_s32_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn0, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn7, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s32_pn15:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn15, svint32x2_t, int32_t,
+ z0 = svld1_s32_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s32_0:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_0, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_1, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_2, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_14, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_16, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m1, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m2, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m16, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m18, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_x1, svint32x2_t, int32_t,
+ z0 = svld1_vnum_s32_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s32_x4.c
new file mode 100644
index 0000000..9d4f1e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s32_base:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_base, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_index:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_index, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_1, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw ()),
+ z0 = svld1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_2, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_3, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ld1_s32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_4, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ld1_s32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_28, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ld1_s32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_32, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m1, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw ()),
+ z0 = svld1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m2, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m3, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ld1_s32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s32_m4, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ld1_s32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m32, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ld1_s32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_m36, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ld1_s32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z17, svint32x4_t, int32_t,
+ z17 = svld1_s32_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_z22:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z22, svint32x4_t, int32_t,
+ z22 = svld1_s32_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_z28:
+** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_z28, svint32x4_t, int32_t,
+ z28 = svld1_s32_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn0, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn7, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s32_pn15:
+** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s32_pn15, svint32x4_t, int32_t,
+ z0 = svld1_s32_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s32_0:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_0, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_1, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_2, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_3, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_4, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_28, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_32, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m1, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m2, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m3, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m4, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m32, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_m36, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s32_x1, svint32x4_t, int32_t,
+ z0 = svld1_vnum_s32_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s64_x2.c
new file mode 100644
index 0000000..25ec7968
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s64_base:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_base, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_index:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_index, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_1, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd ()),
+ z0 = svld1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ld1_s64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_2, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ld1_s64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_14, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_16, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m1, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd ()),
+ z0 = svld1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ld1_s64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m2, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ld1_s64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m16, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ld1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m18, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ld1_s64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z17, svint64x2_t, int64_t,
+ z17 = svld1_s64_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_z22:
+** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z22, svint64x2_t, int64_t,
+ z22 = svld1_s64_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_z28:
+** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z28, svint64x2_t, int64_t,
+ z28 = svld1_s64_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn0, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn7, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s64_pn15:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn15, svint64x2_t, int64_t,
+ z0 = svld1_s64_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s64_0:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_0, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_1, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_2, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_14, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_16, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m1, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m2, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m16, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m18, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_x1, svint64x2_t, int64_t,
+ z0 = svld1_vnum_s64_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s64_x4.c
new file mode 100644
index 0000000..78b174d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s64_base:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_base, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_index:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_index, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_1, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd ()),
+ z0 = svld1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_2, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_3, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ld1_s64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_4, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ld1_s64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_28, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ld1_s64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_32, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m1, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd ()),
+ z0 = svld1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m2, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m3, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ld1_s64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s64_m4, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ld1_s64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m32, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ld1_s64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_m36, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ld1_s64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z17, svint64x4_t, int64_t,
+ z17 = svld1_s64_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_z22:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z22, svint64x4_t, int64_t,
+ z22 = svld1_s64_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_z28:
+** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_z28, svint64x4_t, int64_t,
+ z28 = svld1_s64_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn0, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn7, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s64_pn15:
+** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s64_pn15, svint64x4_t, int64_t,
+ z0 = svld1_s64_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s64_0:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_0, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_1, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_2, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_3, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_4, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_28, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_32, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m1, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m2, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m3, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m4, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m32, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_m36, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s64_x1, svint64x4_t, int64_t,
+ z0 = svld1_vnum_s64_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s8_x2.c
new file mode 100644
index 0000000..61703f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s8_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s8_base:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_base, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_index:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_index, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_1, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb ()),
+ z0 = svld1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ld1_s8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_2, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ld1_s8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_14, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_16, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m1, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb ()),
+ z0 = svld1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ld1_s8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m2, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ld1_s8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m16, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ld1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m18, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ld1_s8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z17, svint8x2_t, int8_t,
+ z17 = svld1_s8_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_z22:
+** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z22, svint8x2_t, int8_t,
+ z22 = svld1_s8_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_z28:
+** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z28, svint8x2_t, int8_t,
+ z28 = svld1_s8_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn0, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn7, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_s8_pn15:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn15, svint8x2_t, int8_t,
+ z0 = svld1_s8_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_s8_0:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_0, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_1, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_s8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_2, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_s8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_14, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_16, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m1, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_s8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m2, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_s8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m16, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m18, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_x1, svint8x2_t, int8_t,
+ z0 = svld1_vnum_s8_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s8_x4.c
new file mode 100644
index 0000000..affed9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_s8_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_s8_base:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_base, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_index:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_index, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_1, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb ()),
+ z0 = svld1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_2, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_3, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ld1_s8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_4, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ld1_s8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_28, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ld1_s8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_32, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m1, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb ()),
+ z0 = svld1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m2, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_s8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m3, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ld1_s8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_s8_m4, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ld1_s8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m32, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ld1_s8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_m36, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ld1_s8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z17, svint8x4_t, int8_t,
+ z17 = svld1_s8_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_z22:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z22, svint8x4_t, int8_t,
+ z22 = svld1_s8_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_z28:
+** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_z28, svint8x4_t, int8_t,
+ z28 = svld1_s8_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn0, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn7, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_s8_pn15:
+** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_s8_pn15, svint8x4_t, int8_t,
+ z0 = svld1_s8_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_s8_0:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_0, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_1, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_2, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_3, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_s8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_4, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_s8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_28, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_s8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_32, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m1, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m2, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_s8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m3, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_s8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m4, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_s8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m32, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_s8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_m36, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_s8_x1, svint8x4_t, int8_t,
+ z0 = svld1_vnum_s8_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u16_x2.c
new file mode 100644
index 0000000..3cce993
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u16_base:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_base, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_index:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_index, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_1, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth ()),
+ z0 = svld1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ld1_u16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_2, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ld1_u16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_14, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_16, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth ()),
+ z0 = svld1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ld1_u16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ld1_u16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ld1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ld1_u16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z17, svuint16x2_t, uint16_t,
+ z17 = svld1_u16_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_z22:
+** ld1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z22, svuint16x2_t, uint16_t,
+ z22 = svld1_u16_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_z28:
+** ld1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z28, svuint16x2_t, uint16_t,
+ z28 = svld1_u16_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn0, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn7, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u16_pn15:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn15, svuint16x2_t, uint16_t,
+ z0 = svld1_u16_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u16_0:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_0, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_1:
+** incb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_1, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u16_2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_2, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u16_14:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_14, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_16:
+** incb x0, all, mul #16
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_16, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m1:
+** decb x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u16_m2:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u16_m16:
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ z0 = svld1_vnum_u16_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u16_x4.c
new file mode 100644
index 0000000..1ca2360
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u16_base:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_base, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_index:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_index, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_1, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth ()),
+ z0 = svld1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_2, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_3, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ld1_u16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_4, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ld1_u16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_28, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ld1_u16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_32, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth ()),
+ z0 = svld1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ld1_u16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ld1_u16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ld1_u16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ld1_u16_z17:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z17, svuint16x4_t, uint16_t,
+ z17 = svld1_u16_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_z22:
+** ld1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z22, svuint16x4_t, uint16_t,
+ z22 = svld1_u16_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_z28:
+** ld1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_z28, svuint16x4_t, uint16_t,
+ z28 = svld1_u16_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn0, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn7, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u16_pn15:
+** ld1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u16_pn15, svuint16x4_t, uint16_t,
+ z0 = svld1_u16_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u16_0:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_0, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_1:
+** incb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_1, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_2:
+** incb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_2, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_3:
+** incb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_3, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u16_4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_4, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u16_28:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_28, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u16_32:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_32, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m1:
+** decb x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m2:
+** decb x0, all, mul #2
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u16_m3:
+** decb x0, all, mul #3
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u16_m4:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u16_m32:
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u16_m36:
+** [^{]*
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ z0 = svld1_vnum_u16_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u32_x2.c
new file mode 100644
index 0000000..87c6cbf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u32_base:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_base, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_index:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_index, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_1, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw ()),
+ z0 = svld1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ld1_u32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_2, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ld1_u32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_14, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_16, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw ()),
+ z0 = svld1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ld1_u32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ld1_u32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ld1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ld1_u32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z17, svuint32x2_t, uint32_t,
+ z17 = svld1_u32_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_z22:
+** ld1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z22, svuint32x2_t, uint32_t,
+ z22 = svld1_u32_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_z28:
+** ld1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z28, svuint32x2_t, uint32_t,
+ z28 = svld1_u32_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn0, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn7, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u32_pn15:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn15, svuint32x2_t, uint32_t,
+ z0 = svld1_u32_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u32_0:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_0, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_1:
+** incb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_1, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u32_2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_2, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u32_14:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_14, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_16:
+** incb x0, all, mul #16
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_16, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m1:
+** decb x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u32_m2:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u32_m16:
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ z0 = svld1_vnum_u32_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u32_x4.c
new file mode 100644
index 0000000..1872afa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u32_base:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_base, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_index:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_index, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_1, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw ()),
+ z0 = svld1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_2, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_3, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ld1_u32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_4, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ld1_u32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_28, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ld1_u32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_32, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw ()),
+ z0 = svld1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ld1_u32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ld1_u32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ld1_u32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ld1_u32_z17:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z17, svuint32x4_t, uint32_t,
+ z17 = svld1_u32_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_z22:
+** ld1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z22, svuint32x4_t, uint32_t,
+ z22 = svld1_u32_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_z28:
+** ld1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_z28, svuint32x4_t, uint32_t,
+ z28 = svld1_u32_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn0, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn7, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u32_pn15:
+** ld1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u32_pn15, svuint32x4_t, uint32_t,
+ z0 = svld1_u32_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u32_0:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_0, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_1:
+** incb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_1, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_2:
+** incb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_2, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_3:
+** incb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_3, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u32_4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_4, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u32_28:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_28, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u32_32:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_32, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m1:
+** decb x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m2:
+** decb x0, all, mul #2
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u32_m3:
+** decb x0, all, mul #3
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u32_m4:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u32_m32:
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u32_m36:
+** [^{]*
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ z0 = svld1_vnum_u32_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u64_x2.c
new file mode 100644
index 0000000..3318adb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u64_base:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_base, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_index:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_index, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_1, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd ()),
+ z0 = svld1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ld1_u64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_2, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ld1_u64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_14, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_16, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd ()),
+ z0 = svld1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ld1_u64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ld1_u64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ld1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ld1_u64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z17, svuint64x2_t, uint64_t,
+ z17 = svld1_u64_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_z22:
+** ld1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z22, svuint64x2_t, uint64_t,
+ z22 = svld1_u64_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_z28:
+** ld1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z28, svuint64x2_t, uint64_t,
+ z28 = svld1_u64_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn0, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn7, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u64_pn15:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn15, svuint64x2_t, uint64_t,
+ z0 = svld1_u64_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u64_0:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_0, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_1:
+** incb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_1, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u64_2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_2, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u64_14:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_14, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_16:
+** incb x0, all, mul #16
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_16, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m1:
+** decb x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u64_m2:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u64_m16:
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ z0 = svld1_vnum_u64_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u64_x4.c
new file mode 100644
index 0000000..9b88d3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u64_base:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_base, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_index:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_index, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_1, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd ()),
+ z0 = svld1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_2, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_3, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ld1_u64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_4, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ld1_u64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_28, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ld1_u64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_32, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd ()),
+ z0 = svld1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ld1_u64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ld1_u64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ld1_u64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ld1_u64_z17:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z17, svuint64x4_t, uint64_t,
+ z17 = svld1_u64_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_z22:
+** ld1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z22, svuint64x4_t, uint64_t,
+ z22 = svld1_u64_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_z28:
+** ld1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_z28, svuint64x4_t, uint64_t,
+ z28 = svld1_u64_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn0, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn7, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u64_pn15:
+** ld1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u64_pn15, svuint64x4_t, uint64_t,
+ z0 = svld1_u64_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u64_0:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_0, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_1:
+** incb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_1, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_2:
+** incb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_2, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_3:
+** incb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_3, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u64_4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_4, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u64_28:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_28, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u64_32:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_32, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m1:
+** decb x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m2:
+** decb x0, all, mul #2
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u64_m3:
+** decb x0, all, mul #3
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u64_m4:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u64_m32:
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u64_m36:
+** [^{]*
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ z0 = svld1_vnum_u64_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u8_x2.c
new file mode 100644
index 0000000..7b35e3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u8_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u8_base:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_base, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0),
+ z0 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_index:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_index, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + x1),
+ z0 = svld1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_1, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb ()),
+ z0 = svld1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ld1_u8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_2, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ld1_u8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_14, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_16, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb ()),
+ z0 = svld1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ld1_u8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ld1_u8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ld1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svld1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ld1_u8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z17, svuint8x2_t, uint8_t,
+ z17 = svld1_u8_x2 (pn8, x0),
+ z17 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_z22:
+** ld1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z22, svuint8x2_t, uint8_t,
+ z22 = svld1_u8_x2 (pn8, x0),
+ z22 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_z28:
+** ld1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z28, svuint8x2_t, uint8_t,
+ z28 = svld1_u8_x2 (pn8, x0),
+ z28 = svld1_x2 (pn8, x0))
+
+/*
+** ld1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn0, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn0, x0),
+ z0 = svld1_x2 (pn0, x0))
+
+/*
+** ld1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn7, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn7, x0),
+ z0 = svld1_x2 (pn7, x0))
+
+/*
+** ld1_u8_pn15:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn15, svuint8x2_t, uint8_t,
+ z0 = svld1_u8_x2 (pn15, x0),
+ z0 = svld1_x2 (pn15, x0))
+
+/*
+** ld1_vnum_u8_0:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_0, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 0),
+ z0 = svld1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_1:
+** incb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_1, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 1),
+ z0 = svld1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ld1_vnum_u8_2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_2, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 2),
+ z0 = svld1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ld1_vnum_u8_14:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_14, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 14),
+ z0 = svld1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_16:
+** incb x0, all, mul #16
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_16, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, 16),
+ z0 = svld1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m1:
+** decb x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -1),
+ z0 = svld1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ld1_vnum_u8_m2:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -2),
+ z0 = svld1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ld1_vnum_u8_m16:
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -16),
+ z0 = svld1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ld1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, -18),
+ z0 = svld1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ld1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ z0 = svld1_vnum_u8_x2 (pn8, x0, x1),
+ z0 = svld1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u8_x4.c
new file mode 100644
index 0000000..3441432
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ld1_u8_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ld1_u8_base:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_base, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0),
+ z0 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_index:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_index, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + x1),
+ z0 = svld1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_1, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb ()),
+ z0 = svld1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_2, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_3, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ld1_u8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_4, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ld1_u8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_28, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ld1_u8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_32, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb ()),
+ z0 = svld1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_u8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ld1_u8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ld1_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ld1_u8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ld1_u8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svld1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ld1_u8_z17:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z17, svuint8x4_t, uint8_t,
+ z17 = svld1_u8_x4 (pn8, x0),
+ z17 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_z22:
+** ld1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z22, svuint8x4_t, uint8_t,
+ z22 = svld1_u8_x4 (pn8, x0),
+ z22 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_z28:
+** ld1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_z28, svuint8x4_t, uint8_t,
+ z28 = svld1_u8_x4 (pn8, x0),
+ z28 = svld1_x4 (pn8, x0))
+
+/*
+** ld1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn0, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn0, x0),
+ z0 = svld1_x4 (pn0, x0))
+
+/*
+** ld1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ld1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn7, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn7, x0),
+ z0 = svld1_x4 (pn7, x0))
+
+/*
+** ld1_u8_pn15:
+** ld1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_u8_pn15, svuint8x4_t, uint8_t,
+ z0 = svld1_u8_x4 (pn15, x0),
+ z0 = svld1_x4 (pn15, x0))
+
+/*
+** ld1_vnum_u8_0:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_0, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 0),
+ z0 = svld1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_1:
+** incb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_1, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 1),
+ z0 = svld1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_2:
+** incb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_2, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 2),
+ z0 = svld1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_3:
+** incb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_3, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 3),
+ z0 = svld1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ld1_vnum_u8_4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_4, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 4),
+ z0 = svld1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ld1_vnum_u8_28:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_28, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 28),
+ z0 = svld1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ld1_vnum_u8_32:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_32, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, 32),
+ z0 = svld1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m1:
+** decb x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -1),
+ z0 = svld1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m2:
+** decb x0, all, mul #2
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -2),
+ z0 = svld1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ld1_vnum_u8_m3:
+** decb x0, all, mul #3
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -3),
+ z0 = svld1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ld1_vnum_u8_m4:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -4),
+ z0 = svld1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ld1_vnum_u8_m32:
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -32),
+ z0 = svld1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ld1_vnum_u8_m36:
+** [^{]*
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, -36),
+ z0 = svld1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ld1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ld1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ld1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ld1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ z0 = svld1_vnum_u8_x4 (pn8, x0, x1),
+ z0 = svld1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_bf16_x2.c
new file mode 100644
index 0000000..2a7c0cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_bf16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_bf16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_bf16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_bf16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_bf16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_bf16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_bf16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ z17 = svldnt1_bf16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ z22 = svldnt1_bf16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ z28 = svldnt1_bf16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_bf16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_bf16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_bf16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_bf16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_bf16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_bf16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_bf16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_bf16_x4.c
new file mode 100644
index 0000000..5f0e5e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_bf16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_bf16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_bf16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_bf16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_bf16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_bf16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_bf16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_bf16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_bf16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_bf16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ z17 = svldnt1_bf16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ z22 = svldnt1_bf16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ z28 = svldnt1_bf16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_bf16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_bf16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_bf16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_bf16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_bf16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_bf16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_bf16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_bf16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_bf16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ z0 = svldnt1_vnum_bf16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f16_x2.c
new file mode 100644
index 0000000..11fb6ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_base, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_index, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_f16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_f16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_14, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_f16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_f16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_f16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z17, svfloat16x2_t, float16_t,
+ z17 = svldnt1_f16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z22, svfloat16x2_t, float16_t,
+ z22 = svldnt1_f16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z28, svfloat16x2_t, float16_t,
+ z28 = svldnt1_f16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn0, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn7, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_f16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn15, svfloat16x2_t, float16_t,
+ z0 = svldnt1_f16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_f16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_0, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_f16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_f16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_14, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_f16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_f16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ z0 = svldnt1_vnum_f16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f16_x4.c
new file mode 100644
index 0000000..bf7113d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_base, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_index, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_f16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_f16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_28, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_f16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_f16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_f16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_f16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_f16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z17, svfloat16x4_t, float16_t,
+ z17 = svldnt1_f16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z22, svfloat16x4_t, float16_t,
+ z22 = svldnt1_f16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_z28, svfloat16x4_t, float16_t,
+ z28 = svldnt1_f16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn0, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn7, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_f16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f16_pn15, svfloat16x4_t, float16_t,
+ z0 = svldnt1_f16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_f16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_0, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_f16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_f16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_28, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_f16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_f16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_f16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_f16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ z0 = svldnt1_vnum_f16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f32_x2.c
new file mode 100644
index 0000000..9aee977
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f32_base:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_base, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_index:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_index, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ldnt1_f32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ldnt1_f32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_14, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ldnt1_f32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ldnt1_f32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ldnt1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ldnt1_f32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z17, svfloat32x2_t, float32_t,
+ z17 = svldnt1_f32_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_z22:
+** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z22, svfloat32x2_t, float32_t,
+ z22 = svldnt1_f32_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_z28:
+** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z28, svfloat32x2_t, float32_t,
+ z28 = svldnt1_f32_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn0, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn7, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_f32_pn15:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn15, svfloat32x2_t, float32_t,
+ z0 = svldnt1_f32_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_f32_0:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_0, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_f32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_f32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_14, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_f32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_f32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ z0 = svldnt1_vnum_f32_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f32_x4.c
new file mode 100644
index 0000000..c1b1ad7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f32_base:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_base, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_index:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_index, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ldnt1_f32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ldnt1_f32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_28, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ldnt1_f32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ldnt1_f32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ldnt1_f32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ldnt1_f32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ldnt1_f32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z17, svfloat32x4_t, float32_t,
+ z17 = svldnt1_f32_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_z22:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z22, svfloat32x4_t, float32_t,
+ z22 = svldnt1_f32_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_z28:
+** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_z28, svfloat32x4_t, float32_t,
+ z28 = svldnt1_f32_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn0, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn7, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_f32_pn15:
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f32_pn15, svfloat32x4_t, float32_t,
+ z0 = svldnt1_f32_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_f32_0:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_0, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_f32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_f32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_28, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_f32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_f32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_f32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_f32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ z0 = svldnt1_vnum_f32_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f64_x2.c
new file mode 100644
index 0000000..da36818
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f64_base:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_base, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_index:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_index, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ldnt1_f64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ldnt1_f64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_14, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ldnt1_f64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ldnt1_f64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ldnt1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ldnt1_f64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z17, svfloat64x2_t, float64_t,
+ z17 = svldnt1_f64_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_z22:
+** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z22, svfloat64x2_t, float64_t,
+ z22 = svldnt1_f64_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_z28:
+** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z28, svfloat64x2_t, float64_t,
+ z28 = svldnt1_f64_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn0, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn7, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_f64_pn15:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn15, svfloat64x2_t, float64_t,
+ z0 = svldnt1_f64_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_f64_0:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_0, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_f64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_f64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_14, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_f64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_f64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ z0 = svldnt1_vnum_f64_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f64_x4.c
new file mode 100644
index 0000000..5eae02d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_f64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_f64_base:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_base, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_index:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_index, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ldnt1_f64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ldnt1_f64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_28, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ldnt1_f64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_f64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ldnt1_f64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ldnt1_f64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ldnt1_f64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ldnt1_f64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z17, svfloat64x4_t, float64_t,
+ z17 = svldnt1_f64_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_z22:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z22, svfloat64x4_t, float64_t,
+ z22 = svldnt1_f64_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_z28:
+** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_z28, svfloat64x4_t, float64_t,
+ z28 = svldnt1_f64_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn0, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn7, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_f64_pn15:
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_f64_pn15, svfloat64x4_t, float64_t,
+ z0 = svldnt1_f64_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_f64_0:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_0, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_f64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_f64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_28, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_f64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_f64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_f64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_f64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_f64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ z0 = svldnt1_vnum_f64_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s16_x2.c
new file mode 100644
index 0000000..4e2a918
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_base, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_index, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_1, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_s16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_2, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_s16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_14, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_16, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m1, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_s16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m2, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_s16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m16, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m18, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_s16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z17, svint16x2_t, int16_t,
+ z17 = svldnt1_s16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z22, svint16x2_t, int16_t,
+ z22 = svldnt1_s16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z28, svint16x2_t, int16_t,
+ z28 = svldnt1_s16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn0, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn7, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn15, svint16x2_t, int16_t,
+ z0 = svldnt1_s16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_0, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_1, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_2, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_14, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_16, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m1, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m2, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m16, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m18, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_x1, svint16x2_t, int16_t,
+ z0 = svldnt1_vnum_s16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s16_x4.c
new file mode 100644
index 0000000..d7ef1d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_base, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_index, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_1, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_2, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_3, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_s16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_4, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_s16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_28, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_s16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_32, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m1, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m2, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m3, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_s16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s16_m4, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_s16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m32, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_s16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_m36, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_s16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z17, svint16x4_t, int16_t,
+ z17 = svldnt1_s16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z22, svint16x4_t, int16_t,
+ z22 = svldnt1_s16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_z28, svint16x4_t, int16_t,
+ z28 = svldnt1_s16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn0, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn7, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s16_pn15, svint16x4_t, int16_t,
+ z0 = svldnt1_s16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_0, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_1, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_2, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_3, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_4, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_28, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_32, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m1, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m2, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m3, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m4, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m32, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_m36, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s16_x1, svint16x4_t, int16_t,
+ z0 = svldnt1_vnum_s16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s32_x2.c
new file mode 100644
index 0000000..5f8e3f9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s32_base:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_base, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_index:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_index, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_1, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ldnt1_s32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_2, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ldnt1_s32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_14, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_16, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m1, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ldnt1_s32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m2, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ldnt1_s32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m16, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ldnt1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m18, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ldnt1_s32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z17, svint32x2_t, int32_t,
+ z17 = svldnt1_s32_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_z22:
+** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z22, svint32x2_t, int32_t,
+ z22 = svldnt1_s32_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_z28:
+** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z28, svint32x2_t, int32_t,
+ z28 = svldnt1_s32_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn0, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn7, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s32_pn15:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn15, svint32x2_t, int32_t,
+ z0 = svldnt1_s32_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s32_0:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_0, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_1, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_2, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_14, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_16, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m1, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m2, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m16, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m18, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_x1, svint32x2_t, int32_t,
+ z0 = svldnt1_vnum_s32_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s32_x4.c
new file mode 100644
index 0000000..137c4c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s32_base:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_base, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_index:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_index, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_1, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_2, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_3, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ldnt1_s32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_4, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ldnt1_s32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_28, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ldnt1_s32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_32, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m1, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m2, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m3, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ldnt1_s32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s32_m4, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ldnt1_s32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m32, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ldnt1_s32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_m36, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ldnt1_s32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z17, svint32x4_t, int32_t,
+ z17 = svldnt1_s32_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_z22:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z22, svint32x4_t, int32_t,
+ z22 = svldnt1_s32_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_z28:
+** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_z28, svint32x4_t, int32_t,
+ z28 = svldnt1_s32_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn0, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn7, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s32_pn15:
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s32_pn15, svint32x4_t, int32_t,
+ z0 = svldnt1_s32_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s32_0:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_0, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_1, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_2, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_3, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_4, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_28, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_32, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m1, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m2, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m3, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m4, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m32, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_m36, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s32_x1, svint32x4_t, int32_t,
+ z0 = svldnt1_vnum_s32_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s64_x2.c
new file mode 100644
index 0000000..f79423f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s64_base:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_base, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_index:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_index, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_1, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ldnt1_s64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_2, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ldnt1_s64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_14, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_16, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m1, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ldnt1_s64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m2, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ldnt1_s64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m16, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ldnt1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m18, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ldnt1_s64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z17, svint64x2_t, int64_t,
+ z17 = svldnt1_s64_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_z22:
+** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z22, svint64x2_t, int64_t,
+ z22 = svldnt1_s64_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_z28:
+** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z28, svint64x2_t, int64_t,
+ z28 = svldnt1_s64_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn0, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn7, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s64_pn15:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn15, svint64x2_t, int64_t,
+ z0 = svldnt1_s64_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s64_0:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_0, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_1, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_2, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_14, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_16, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m1, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m2, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m16, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m18, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_x1, svint64x2_t, int64_t,
+ z0 = svldnt1_vnum_s64_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s64_x4.c
new file mode 100644
index 0000000..66a9b113
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s64_base:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_base, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_index:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_index, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_1, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_2, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_3, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ldnt1_s64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_4, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ldnt1_s64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_28, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ldnt1_s64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_32, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m1, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m2, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m3, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ldnt1_s64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s64_m4, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ldnt1_s64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m32, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ldnt1_s64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_m36, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ldnt1_s64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z17, svint64x4_t, int64_t,
+ z17 = svldnt1_s64_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_z22:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z22, svint64x4_t, int64_t,
+ z22 = svldnt1_s64_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_z28:
+** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_z28, svint64x4_t, int64_t,
+ z28 = svldnt1_s64_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn0, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn7, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s64_pn15:
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s64_pn15, svint64x4_t, int64_t,
+ z0 = svldnt1_s64_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s64_0:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_0, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_1, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_2, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_3, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_4, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_28, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_32, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m1, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m2, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m3, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m4, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m32, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_m36, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s64_x1, svint64x4_t, int64_t,
+ z0 = svldnt1_vnum_s64_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s8_x2.c
new file mode 100644
index 0000000..c22f35f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s8_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s8_base:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_base, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_index:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_index, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_1, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ldnt1_s8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_2, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ldnt1_s8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_14, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_16, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m1, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ldnt1_s8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m2, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ldnt1_s8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m16, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ldnt1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m18, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ldnt1_s8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z17, svint8x2_t, int8_t,
+ z17 = svldnt1_s8_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_z22:
+** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z22, svint8x2_t, int8_t,
+ z22 = svldnt1_s8_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_z28:
+** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z28, svint8x2_t, int8_t,
+ z28 = svldnt1_s8_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn0, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn7, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_s8_pn15:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn15, svint8x2_t, int8_t,
+ z0 = svldnt1_s8_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_s8_0:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_0, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_1, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_s8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_2, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_s8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_14, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_16, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m1, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_s8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m2, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_s8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m16, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m18, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_x1, svint8x2_t, int8_t,
+ z0 = svldnt1_vnum_s8_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s8_x4.c
new file mode 100644
index 0000000..a2b1aed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_s8_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_s8_base:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_base, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_index:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_index, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_1, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_2, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_3, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ldnt1_s8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_4, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ldnt1_s8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_28, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ldnt1_s8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_32, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m1, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m2, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_s8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m3, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ldnt1_s8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_s8_m4, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ldnt1_s8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m32, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ldnt1_s8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_m36, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ldnt1_s8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z17, svint8x4_t, int8_t,
+ z17 = svldnt1_s8_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_z22:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z22, svint8x4_t, int8_t,
+ z22 = svldnt1_s8_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_z28:
+** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_z28, svint8x4_t, int8_t,
+ z28 = svldnt1_s8_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn0, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn7, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_s8_pn15:
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_s8_pn15, svint8x4_t, int8_t,
+ z0 = svldnt1_s8_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_s8_0:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_0, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_1, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_2, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_3, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_s8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_4, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_s8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_28, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_s8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_32, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m1, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m2, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_s8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m3, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_s8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m4, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_s8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m32, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_s8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_m36, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_s8_x1, svint8x4_t, int8_t,
+ z0 = svldnt1_vnum_s8_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u16_x2.c
new file mode 100644
index 0000000..9868fc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u16_base:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_base, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_index:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_index, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth ()))
+
+/*
+** ldnt1_u16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 2))
+
+/*
+** ldnt1_u16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_14, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 + svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcnth () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth ()))
+
+/*
+** ldnt1_u16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 2))
+
+/*
+** ldnt1_u16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 16))
+
+/*
+** ldnt1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn8, x0 - svcnth () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcnth () * 18))
+
+/*
+** ldnt1_u16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z17, svuint16x2_t, uint16_t,
+ z17 = svldnt1_u16_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_z22:
+** ldnt1h {z22\.h(?: - |, )z23\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z22, svuint16x2_t, uint16_t,
+ z22 = svldnt1_u16_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_z28:
+** ldnt1h {z28\.h(?: - |, )z29\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z28, svuint16x2_t, uint16_t,
+ z28 = svldnt1_u16_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn0, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn7, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u16_pn15:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn15, svuint16x2_t, uint16_t,
+ z0 = svldnt1_u16_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u16_0:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_0, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_1:
+** incb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u16_2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u16_14:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_14, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_16:
+** incb x0, all, mul #16
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m1:
+** decb x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u16_m2:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u16_m16:
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h(?: - |, )z1\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u16_x4.c
new file mode 100644
index 0000000..f27f103
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u16_base:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_base, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_index:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_index, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 3))
+
+/*
+** ldnt1_u16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 4))
+
+/*
+** ldnt1_u16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_28, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 28))
+
+/*
+** ldnt1_u16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 + svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcnth () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 3))
+
+/*
+** ldnt1_u16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 4))
+
+/*
+** ldnt1_u16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 32))
+
+/*
+** ldnt1_u16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn8, x0 - svcnth () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcnth () * 36))
+
+/*
+** ldnt1_u16_z17:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z17, svuint16x4_t, uint16_t,
+ z17 = svldnt1_u16_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_z22:
+** ldnt1h {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z22, svuint16x4_t, uint16_t,
+ z22 = svldnt1_u16_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_z28:
+** ldnt1h {z28\.h(?: - |, )z31\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_z28, svuint16x4_t, uint16_t,
+ z28 = svldnt1_u16_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn0, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn7, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u16_pn15:
+** ldnt1h {z0\.h(?: - |, )z3\.h}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u16_pn15, svuint16x4_t, uint16_t,
+ z0 = svldnt1_u16_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u16_0:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_0, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_1:
+** incb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_2:
+** incb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_3:
+** incb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u16_4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u16_28:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_28, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u16_32:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m1:
+** decb x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m2:
+** decb x0, all, mul #2
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u16_m3:
+** decb x0, all, mul #3
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u16_m4:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u16_m32:
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u16_m36:
+** [^{]*
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1h {z0\.h - z3\.h}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ z0 = svldnt1_vnum_u16_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u32_x2.c
new file mode 100644
index 0000000..363f85f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u32_base:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_base, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_index:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_index, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw ()))
+
+/*
+** ldnt1_u32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 2))
+
+/*
+** ldnt1_u32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_14, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 + svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntw () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw ()))
+
+/*
+** ldnt1_u32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 2))
+
+/*
+** ldnt1_u32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 16))
+
+/*
+** ldnt1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn8, x0 - svcntw () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntw () * 18))
+
+/*
+** ldnt1_u32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z17, svuint32x2_t, uint32_t,
+ z17 = svldnt1_u32_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_z22:
+** ldnt1w {z22\.s(?: - |, )z23\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z22, svuint32x2_t, uint32_t,
+ z22 = svldnt1_u32_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_z28:
+** ldnt1w {z28\.s(?: - |, )z29\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z28, svuint32x2_t, uint32_t,
+ z28 = svldnt1_u32_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn0, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn7, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u32_pn15:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn15, svuint32x2_t, uint32_t,
+ z0 = svldnt1_u32_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u32_0:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_0, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_1:
+** incb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u32_2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u32_14:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_14, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_16:
+** incb x0, all, mul #16
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m1:
+** decb x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u32_m2:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u32_m16:
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s(?: - |, )z1\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u32_x4.c
new file mode 100644
index 0000000..ffa651e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u32_base:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_base, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_index:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_index, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 3))
+
+/*
+** ldnt1_u32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 4))
+
+/*
+** ldnt1_u32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_28, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 28))
+
+/*
+** ldnt1_u32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 + svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntw () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 3))
+
+/*
+** ldnt1_u32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 4))
+
+/*
+** ldnt1_u32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 32))
+
+/*
+** ldnt1_u32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn8, x0 - svcntw () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntw () * 36))
+
+/*
+** ldnt1_u32_z17:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z17, svuint32x4_t, uint32_t,
+ z17 = svldnt1_u32_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_z22:
+** ldnt1w {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z22, svuint32x4_t, uint32_t,
+ z22 = svldnt1_u32_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_z28:
+** ldnt1w {z28\.s(?: - |, )z31\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_z28, svuint32x4_t, uint32_t,
+ z28 = svldnt1_u32_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn0, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn7, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u32_pn15:
+** ldnt1w {z0\.s(?: - |, )z3\.s}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u32_pn15, svuint32x4_t, uint32_t,
+ z0 = svldnt1_u32_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u32_0:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_0, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_1:
+** incb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_2:
+** incb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_3:
+** incb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u32_4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u32_28:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_28, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u32_32:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m1:
+** decb x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m2:
+** decb x0, all, mul #2
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u32_m3:
+** decb x0, all, mul #3
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u32_m4:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u32_m32:
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u32_m36:
+** [^{]*
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1w {z0\.s - z3\.s}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ z0 = svldnt1_vnum_u32_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u64_x2.c
new file mode 100644
index 0000000..4f3f22e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u64_base:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_base, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_index:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_index, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd ()))
+
+/*
+** ldnt1_u64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 2))
+
+/*
+** ldnt1_u64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_14, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 + svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntd () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd ()))
+
+/*
+** ldnt1_u64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 2))
+
+/*
+** ldnt1_u64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 16))
+
+/*
+** ldnt1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn8, x0 - svcntd () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntd () * 18))
+
+/*
+** ldnt1_u64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z17, svuint64x2_t, uint64_t,
+ z17 = svldnt1_u64_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_z22:
+** ldnt1d {z22\.d(?: - |, )z23\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z22, svuint64x2_t, uint64_t,
+ z22 = svldnt1_u64_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_z28:
+** ldnt1d {z28\.d(?: - |, )z29\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z28, svuint64x2_t, uint64_t,
+ z28 = svldnt1_u64_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn0, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn7, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u64_pn15:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn15, svuint64x2_t, uint64_t,
+ z0 = svldnt1_u64_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u64_0:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_0, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_1:
+** incb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u64_2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u64_14:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_14, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_16:
+** incb x0, all, mul #16
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m1:
+** decb x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u64_m2:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u64_m16:
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d(?: - |, )z1\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u64_x4.c
new file mode 100644
index 0000000..c0c2c8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u64_base:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_base, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_index:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_index, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 3))
+
+/*
+** ldnt1_u64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 4))
+
+/*
+** ldnt1_u64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_28, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 28))
+
+/*
+** ldnt1_u64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 + svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntd () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 3))
+
+/*
+** ldnt1_u64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 4))
+
+/*
+** ldnt1_u64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 32))
+
+/*
+** ldnt1_u64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn8, x0 - svcntd () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntd () * 36))
+
+/*
+** ldnt1_u64_z17:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z17, svuint64x4_t, uint64_t,
+ z17 = svldnt1_u64_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_z22:
+** ldnt1d {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z22, svuint64x4_t, uint64_t,
+ z22 = svldnt1_u64_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_z28:
+** ldnt1d {z28\.d(?: - |, )z31\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_z28, svuint64x4_t, uint64_t,
+ z28 = svldnt1_u64_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn0, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn7, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u64_pn15:
+** ldnt1d {z0\.d(?: - |, )z3\.d}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u64_pn15, svuint64x4_t, uint64_t,
+ z0 = svldnt1_u64_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u64_0:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_0, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_1:
+** incb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_2:
+** incb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_3:
+** incb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u64_4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u64_28:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_28, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u64_32:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m1:
+** decb x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m2:
+** decb x0, all, mul #2
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u64_m3:
+** decb x0, all, mul #3
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u64_m4:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u64_m32:
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u64_m36:
+** [^{]*
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1d {z0\.d - z3\.d}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ z0 = svldnt1_vnum_u64_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u8_x2.c
new file mode 100644
index 0000000..702e56f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u8_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u8_base:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_base, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0),
+ z0 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_index:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_index, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + x1),
+ z0 = svldnt1_x2 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb ()))
+
+/*
+** ldnt1_u8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 2))
+
+/*
+** ldnt1_u8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_14, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 14),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 + svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 + svcntb () * 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb ()))
+
+/*
+** ldnt1_u8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 2))
+
+/*
+** ldnt1_u8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 16),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 16))
+
+/*
+** ldnt1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn8, x0 - svcntb () * 18),
+ z0 = svldnt1_x2 (pn8, x0 - svcntb () * 18))
+
+/*
+** ldnt1_u8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z17, svuint8x2_t, uint8_t,
+ z17 = svldnt1_u8_x2 (pn8, x0),
+ z17 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_z22:
+** ldnt1b {z22\.b(?: - |, )z23\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z22, svuint8x2_t, uint8_t,
+ z22 = svldnt1_u8_x2 (pn8, x0),
+ z22 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_z28:
+** ldnt1b {z28\.b(?: - |, )z29\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z28, svuint8x2_t, uint8_t,
+ z28 = svldnt1_u8_x2 (pn8, x0),
+ z28 = svldnt1_x2 (pn8, x0))
+
+/*
+** ldnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn0, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn0, x0),
+ z0 = svldnt1_x2 (pn0, x0))
+
+/*
+** ldnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn7, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn7, x0),
+ z0 = svldnt1_x2 (pn7, x0))
+
+/*
+** ldnt1_u8_pn15:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn15, svuint8x2_t, uint8_t,
+ z0 = svldnt1_u8_x2 (pn15, x0),
+ z0 = svldnt1_x2 (pn15, x0))
+
+/*
+** ldnt1_vnum_u8_0:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_0, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_1:
+** incb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 1))
+
+/*
+** ldnt1_vnum_u8_2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 2))
+
+/*
+** ldnt1_vnum_u8_14:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_14, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 14),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 14))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_16:
+** incb x0, all, mul #16
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, 16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, 16))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m1:
+** decb x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -1))
+
+/*
+** ldnt1_vnum_u8_m2:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -2))
+
+/*
+** ldnt1_vnum_u8_m16:
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -16),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -16))
+
+/*
+** ldnt1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, -18),
+ z0 = svldnt1_vnum_x2 (pn8, x0, -18))
+
+/*
+** ldnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b(?: - |, )z1\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x2 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x2 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u8_x4.c
new file mode 100644
index 0000000..baf3d14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_u8_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ldnt1_u8_base:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_base, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0),
+ z0 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_index:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x1\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_index, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + x1),
+ z0 = svldnt1_x4 (pn8, x0 + x1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 3))
+
+/*
+** ldnt1_u8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 4))
+
+/*
+** ldnt1_u8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_28, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 28),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 28))
+
+/*
+** ldnt1_u8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 + svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 + svcntb () * 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb ()),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb ()))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 2),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_u8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 3),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 3))
+
+/*
+** ldnt1_u8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+ TEST_LOAD_COUNT (ldnt1_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 4),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 4))
+
+/*
+** ldnt1_u8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 32),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 32))
+
+/*
+** ldnt1_u8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn8, x0 - svcntb () * 36),
+ z0 = svldnt1_x4 (pn8, x0 - svcntb () * 36))
+
+/*
+** ldnt1_u8_z17:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z17, svuint8x4_t, uint8_t,
+ z17 = svldnt1_u8_x4 (pn8, x0),
+ z17 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_z22:
+** ldnt1b {z[^\n]+}, pn8/z, \[x0\]
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z22, svuint8x4_t, uint8_t,
+ z22 = svldnt1_u8_x4 (pn8, x0),
+ z22 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_z28:
+** ldnt1b {z28\.b(?: - |, )z31\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_z28, svuint8x4_t, uint8_t,
+ z28 = svldnt1_u8_x4 (pn8, x0),
+ z28 = svldnt1_x4 (pn8, x0))
+
+/*
+** ldnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn0, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn0, x0),
+ z0 = svldnt1_x4 (pn0, x0))
+
+/*
+** ldnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn\1/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn7, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn7, x0),
+ z0 = svldnt1_x4 (pn7, x0))
+
+/*
+** ldnt1_u8_pn15:
+** ldnt1b {z0\.b(?: - |, )z3\.b}, pn15/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_u8_pn15, svuint8x4_t, uint8_t,
+ z0 = svldnt1_u8_x4 (pn15, x0),
+ z0 = svldnt1_x4 (pn15, x0))
+
+/*
+** ldnt1_vnum_u8_0:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_0, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 0),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_1:
+** incb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_2:
+** incb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_3:
+** incb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 3))
+
+/*
+** ldnt1_vnum_u8_4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 4))
+
+/*
+** ldnt1_vnum_u8_28:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_28, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 28),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 28))
+
+/*
+** ldnt1_vnum_u8_32:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, 32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, 32))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m1:
+** decb x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -1))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m2:
+** decb x0, all, mul #2
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -2),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -2))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** ldnt1_vnum_u8_m3:
+** decb x0, all, mul #3
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -3),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -3))
+
+/*
+** ldnt1_vnum_u8_m4:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -4),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -4))
+
+/*
+** ldnt1_vnum_u8_m32:
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -32),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -32))
+
+/*
+** ldnt1_vnum_u8_m36:
+** [^{]*
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, -36),
+ z0 = svldnt1_vnum_x4 (pn8, x0, -36))
+
+/*
+** ldnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** ldnt1b {z0\.b - z3\.b}, pn8/z, \[x0, \3\]
+** )
+** ret
+*/
+TEST_LOAD_COUNT (ldnt1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ z0 = svldnt1_vnum_u8_x4 (pn8, x0, x1),
+ z0 = svldnt1_vnum_x4 (pn8, x0, x1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c16.c
new file mode 100644
index 0000000..0fb4e3d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c16.c
@@ -0,0 +1,57 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.h, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svbool_t,
+ p2 = svpext_lane_c16 (pn0, 0),
+ p2 = svpext_lane_c16 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.h, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svbool_t,
+ p5 = svpext_lane_c16 (pn7, 1),
+ p5 = svpext_lane_c16 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_2:
+** pext p9\.h, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_2, svbool_t,
+ p9 = svpext_lane_c16 (pn8, 2),
+ p9 = svpext_lane_c16 (pn8, 2))
+
+/*
+** pext_lane_p12_pn11_3:
+** pext p12\.h, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_3, svbool_t,
+ p12 = svpext_lane_c16 (pn11, 3),
+ p12 = svpext_lane_c16 (pn11, 3))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext p2\.h, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svbool_t,
+ p2 = svpext_lane_c16 (pn15, 0),
+ p2 = svpext_lane_c16 (pn15, 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c16_x2.c
new file mode 100644
index 0000000..bac920b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c16_x2.c
@@ -0,0 +1,61 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.h, p3\.h}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svboolx2_t,
+ p2 = svpext_lane_c16_x2 (pn0, 0),
+ p2 = svpext_lane_c16_x2 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svboolx2_t,
+ p5 = svpext_lane_c16_x2 (pn7, 1),
+ p5 = svpext_lane_c16_x2 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_0, svboolx2_t,
+ p9 = svpext_lane_c16_x2 (pn8, 0),
+ p9 = svpext_lane_c16_x2 (pn8, 0))
+
+/*
+** pext_lane_p12_pn11_1:
+** pext {p12\.h, p13\.h}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_1, svboolx2_t,
+ p12 = svpext_lane_c16_x2 (pn11, 1),
+ p12 = svpext_lane_c16_x2 (pn11, 1))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext {p2\.h, p3\.h}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svboolx2_t,
+ p2 = svpext_lane_c16_x2 (pn15, 0),
+ p2 = svpext_lane_c16_x2 (pn15, 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c32.c
new file mode 100644
index 0000000..6c6d9b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c32.c
@@ -0,0 +1,57 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.s, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svbool_t,
+ p2 = svpext_lane_c32 (pn0, 0),
+ p2 = svpext_lane_c32 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.s, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svbool_t,
+ p5 = svpext_lane_c32 (pn7, 1),
+ p5 = svpext_lane_c32 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_2:
+** pext p9\.s, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_2, svbool_t,
+ p9 = svpext_lane_c32 (pn8, 2),
+ p9 = svpext_lane_c32 (pn8, 2))
+
+/*
+** pext_lane_p12_pn11_3:
+** pext p12\.s, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_3, svbool_t,
+ p12 = svpext_lane_c32 (pn11, 3),
+ p12 = svpext_lane_c32 (pn11, 3))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext p2\.s, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svbool_t,
+ p2 = svpext_lane_c32 (pn15, 0),
+ p2 = svpext_lane_c32 (pn15, 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c32_x2.c
new file mode 100644
index 0000000..f0dd909
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c32_x2.c
@@ -0,0 +1,61 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.s, p3\.s}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svboolx2_t,
+ p2 = svpext_lane_c32_x2 (pn0, 0),
+ p2 = svpext_lane_c32_x2 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svboolx2_t,
+ p5 = svpext_lane_c32_x2 (pn7, 1),
+ p5 = svpext_lane_c32_x2 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_0, svboolx2_t,
+ p9 = svpext_lane_c32_x2 (pn8, 0),
+ p9 = svpext_lane_c32_x2 (pn8, 0))
+
+/*
+** pext_lane_p12_pn11_1:
+** pext {p12\.s, p13\.s}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_1, svboolx2_t,
+ p12 = svpext_lane_c32_x2 (pn11, 1),
+ p12 = svpext_lane_c32_x2 (pn11, 1))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext {p2\.s, p3\.s}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svboolx2_t,
+ p2 = svpext_lane_c32_x2 (pn15, 0),
+ p2 = svpext_lane_c32_x2 (pn15, 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c64.c
new file mode 100644
index 0000000..1117dce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c64.c
@@ -0,0 +1,57 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.d, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svbool_t,
+ p2 = svpext_lane_c64 (pn0, 0),
+ p2 = svpext_lane_c64 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.d, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svbool_t,
+ p5 = svpext_lane_c64 (pn7, 1),
+ p5 = svpext_lane_c64 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_2:
+** pext p9\.d, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_2, svbool_t,
+ p9 = svpext_lane_c64 (pn8, 2),
+ p9 = svpext_lane_c64 (pn8, 2))
+
+/*
+** pext_lane_p12_pn11_3:
+** pext p12\.d, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_3, svbool_t,
+ p12 = svpext_lane_c64 (pn11, 3),
+ p12 = svpext_lane_c64 (pn11, 3))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext p2\.d, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svbool_t,
+ p2 = svpext_lane_c64 (pn15, 0),
+ p2 = svpext_lane_c64 (pn15, 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c64_x2.c
new file mode 100644
index 0000000..57c84d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c64_x2.c
@@ -0,0 +1,61 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.d, p3\.d}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svboolx2_t,
+ p2 = svpext_lane_c64_x2 (pn0, 0),
+ p2 = svpext_lane_c64_x2 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svboolx2_t,
+ p5 = svpext_lane_c64_x2 (pn7, 1),
+ p5 = svpext_lane_c64_x2 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_0, svboolx2_t,
+ p9 = svpext_lane_c64_x2 (pn8, 0),
+ p9 = svpext_lane_c64_x2 (pn8, 0))
+
+/*
+** pext_lane_p12_pn11_1:
+** pext {p12\.d, p13\.d}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_1, svboolx2_t,
+ p12 = svpext_lane_c64_x2 (pn11, 1),
+ p12 = svpext_lane_c64_x2 (pn11, 1))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext {p2\.d, p3\.d}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svboolx2_t,
+ p2 = svpext_lane_c64_x2 (pn15, 0),
+ p2 = svpext_lane_c64_x2 (pn15, 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c8.c
new file mode 100644
index 0000000..c8c11bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c8.c
@@ -0,0 +1,57 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext p2\.b, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svbool_t,
+ p2 = svpext_lane_c8 (pn0, 0),
+ p2 = svpext_lane_c8 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext p5\.b, pn\1\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svbool_t,
+ p5 = svpext_lane_c8 (pn7, 1),
+ p5 = svpext_lane_c8 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_2:
+** pext p9\.b, pn8\[2\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_2, svbool_t,
+ p9 = svpext_lane_c8 (pn8, 2),
+ p9 = svpext_lane_c8 (pn8, 2))
+
+/*
+** pext_lane_p12_pn11_3:
+** pext p12\.b, pn11\[3\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_3, svbool_t,
+ p12 = svpext_lane_c8 (pn11, 3),
+ p12 = svpext_lane_c8 (pn11, 3))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext p2\.b, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svbool_t,
+ p2 = svpext_lane_c8 (pn15, 0),
+ p2 = svpext_lane_c8 (pn15, 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c8_x2.c
new file mode 100644
index 0000000..4c6088f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pext_lane_c8_x2.c
@@ -0,0 +1,61 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** pext_lane_p2_pn0_0:
+** mov p([0-9]+)\.b, p0\.b
+** pext {p2\.b, p3\.b}, pn\1\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn0_0, svboolx2_t,
+ p2 = svpext_lane_c8_x2 (pn0, 0),
+ p2 = svpext_lane_c8_x2 (pn0, 0))
+
+/*
+** pext_lane_p5_pn7_1:
+** mov p([0-9]+)\.b, p7\.b
+** pext {[^}]+}, pn\1\[1\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p5_pn7_1, svboolx2_t,
+ p5 = svpext_lane_c8_x2 (pn7, 1),
+ p5 = svpext_lane_c8_x2 (pn7, 1))
+
+/*
+** pext_lane_p9_pn8_0:
+** pext {[^}]+}, pn8\[0\]
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p9_pn8_0, svboolx2_t,
+ p9 = svpext_lane_c8_x2 (pn8, 0),
+ p9 = svpext_lane_c8_x2 (pn8, 0))
+
+/*
+** pext_lane_p12_pn11_1:
+** pext {p12\.b, p13\.b}, pn11\[1\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p12_pn11_1, svboolx2_t,
+ p12 = svpext_lane_c8_x2 (pn11, 1),
+ p12 = svpext_lane_c8_x2 (pn11, 1))
+
+/*
+** pext_lane_p2_pn15_0:
+** pext {p2\.b, p3\.b}, pn15\[0\]
+** ret
+*/
+TEST_EXTRACT_PN (pext_lane_p2_pn15_0, svboolx2_t,
+ p2 = svpext_lane_c8_x2 (pn15, 0),
+ p2 = svpext_lane_c8_x2 (pn15, 0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c16.c
new file mode 100644
index 0000000..e3270b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c16.c
@@ -0,0 +1,48 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.h
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c16 (),
+ pn0 = svptrue_c16 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.h
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c16 (),
+ pn7 = svptrue_c16 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.h
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c16 (),
+ pn8 = svptrue_c16 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.h
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c16 (),
+ pn15 = svptrue_c16 ())
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c32.c
new file mode 100644
index 0000000..9d2c566
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c32.c
@@ -0,0 +1,48 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.s
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c32 (),
+ pn0 = svptrue_c32 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.s
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c32 (),
+ pn7 = svptrue_c32 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.s
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c32 (),
+ pn8 = svptrue_c32 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.s
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c32 (),
+ pn15 = svptrue_c32 ())
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c64.c
new file mode 100644
index 0000000..10587f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c64.c
@@ -0,0 +1,48 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.d
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c64 (),
+ pn0 = svptrue_c64 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.d
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c64 (),
+ pn7 = svptrue_c64 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.d
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c64 (),
+ pn8 = svptrue_c64 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.d
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c64 (),
+ pn15 = svptrue_c64 ())
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c8.c
new file mode 100644
index 0000000..2d3573a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ptrue_c8.c
@@ -0,0 +1,48 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** ptrue_pn0:
+** ptrue pn([8-9]|1[0-5])\.b
+** mov p0\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn0,
+ pn0 = svptrue_c8 (),
+ pn0 = svptrue_c8 ())
+
+/*
+** ptrue_pn7:
+** ptrue pn([8-9]|1[0-5])\.b
+** mov p7\.b, p\1\.b
+** ret
+*/
+TEST_PN (ptrue_pn7,
+ pn7 = svptrue_c8 (),
+ pn7 = svptrue_c8 ())
+
+/*
+** ptrue_pn8:
+** ptrue pn8\.b
+** ret
+*/
+TEST_PN (ptrue_pn8,
+ pn8 = svptrue_c8 (),
+ pn8 = svptrue_c8 ())
+
+/*
+** ptrue_pn15:
+** ptrue pn15\.b
+** ret
+*/
+TEST_PN (ptrue_pn15,
+ pn15 = svptrue_c8 (),
+ pn15 = svptrue_c8 ())
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_s16_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_s16_s32_x2.c
new file mode 100644
index 0000000..d1f42c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_s16_s32_x2.c
@@ -0,0 +1,57 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qcvtn_z0_z0:
+** sqcvtn z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z0, svint32x2_t, svint16_t,
+ z0_res = svqcvtn_s16_s32_x2 (z0),
+ z0_res = svqcvtn_s16 (z0))
+
+/*
+** qcvtn_z0_z6:
+** sqcvtn z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z6, svint32x2_t, svint16_t,
+ z0_res = svqcvtn_s16_s32_x2 (z6),
+ z0_res = svqcvtn_s16 (z6))
+
+/*
+** qcvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtn z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z29, svint32x2_t, svint16_t,
+ z0_res = svqcvtn_s16_s32_x2 (z29),
+ z0_res = svqcvtn_s16 (z29))
+
+/*
+** qcvtn_z5_z0:
+** sqcvtn z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z5_z0, svint32x2_t, svint16_t,
+ z5 = svqcvtn_s16_s32_x2 (z0),
+ z5 = svqcvtn_s16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** sqcvtn z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z22_z16, svint32x2_t, svint16_t,
+ z22 = svqcvtn_s16_s32_x2 (z16),
+ z22 = svqcvtn_s16 (z16))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_u16_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_u16_s32_x2.c
new file mode 100644
index 0000000..c7b270e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_u16_s32_x2.c
@@ -0,0 +1,57 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qcvtn_z0_z0:
+** sqcvtun z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z0, svint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_s32_x2 (z0),
+ z0_res = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z0_z6:
+** sqcvtun z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z6, svint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_s32_x2 (z6),
+ z0_res = svqcvtn_u16 (z6))
+
+/*
+** qcvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** sqcvtun z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z29, svint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_s32_x2 (z29),
+ z0_res = svqcvtn_u16 (z29))
+
+/*
+** qcvtn_z5_z0:
+** sqcvtun z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z5_z0, svint32x2_t, svuint16_t,
+ z5 = svqcvtn_u16_s32_x2 (z0),
+ z5 = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** sqcvtun z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z22_z16, svint32x2_t, svuint16_t,
+ z22 = svqcvtn_u16_s32_x2 (z16),
+ z22 = svqcvtn_u16 (z16))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_u16_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_u16_u32_x2.c
new file mode 100644
index 0000000..d157bf3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qcvtn_u16_u32_x2.c
@@ -0,0 +1,57 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qcvtn_z0_z0:
+** uqcvtn z0\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z0, svuint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_u32_x2 (z0),
+ z0_res = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z0_z6:
+** uqcvtn z0\.h, {z6\.s - z7\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z6, svuint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_u32_x2 (z6),
+ z0_res = svqcvtn_u16 (z6))
+
+/*
+** qcvtn_z0_z29:
+** mov [^\n]+
+** mov [^\n]+
+** uqcvtn z0\.h, [^\n]+
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z0_z29, svuint32x2_t, svuint16_t,
+ z0_res = svqcvtn_u16_u32_x2 (z29),
+ z0_res = svqcvtn_u16 (z29))
+
+/*
+** qcvtn_z5_z0:
+** uqcvtn z5\.h, {z0\.s - z1\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z5_z0, svuint32x2_t, svuint16_t,
+ z5 = svqcvtn_u16_u32_x2 (z0),
+ z5 = svqcvtn_u16 (z0))
+
+/*
+** qcvtn_z22_z16:
+** uqcvtn z22\.h, {z16\.s - z17\.s}
+** ret
+*/
+TEST_X2_NARROW (qcvtn_z22_z16, svuint32x2_t, svuint16_t,
+ z22 = svqcvtn_u16_u32_x2 (z16),
+ z22 = svqcvtn_u16 (z16))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrn_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrn_s16_x2.c
new file mode 100644
index 0000000..462ad9f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrn_s16_x2.c
@@ -0,0 +1,57 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qrshrn_z0_z0_1:
+** sqrshrn z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z0_1, svint32x2_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s32_x2 (z0, 1),
+ z0_res = svqrshrn_s16 (z0, 1))
+
+/*
+** qrshrn_z0_z6_16:
+** sqrshrn z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z6_16, svint32x2_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s32_x2 (z6, 16),
+ z0_res = svqrshrn_s16 (z6, 16))
+
+/*
+** qrshrn_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrn z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z29_13, svint32x2_t, svint16_t,
+ z0_res = svqrshrn_n_s16_s32_x2 (z29, 13),
+ z0_res = svqrshrn_s16 (z29, 13))
+
+/*
+** qrshrn_z5_z0_11:
+** sqrshrn z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z5_z0_11, svint32x2_t, svint16_t,
+ z5 = svqrshrn_n_s16_s32_x2 (z0, 11),
+ z5 = svqrshrn_s16 (z0, 11))
+
+/*
+** qrshrn_z22_z16_15:
+** sqrshrn z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z22_z16_15, svint32x2_t, svint16_t,
+ z22 = svqrshrn_n_s16_s32_x2 (z16, 15),
+ z22 = svqrshrn_s16 (z16, 15))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrn_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrn_u16_x2.c
new file mode 100644
index 0000000..48d0d41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrn_u16_x2.c
@@ -0,0 +1,57 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qrshrn_z0_z0_1:
+** uqrshrn z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z0_1, svuint32x2_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u32_x2 (z0, 1),
+ z0_res = svqrshrn_u16 (z0, 1))
+
+/*
+** qrshrn_z0_z6_16:
+** uqrshrn z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z6_16, svuint32x2_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u32_x2 (z6, 16),
+ z0_res = svqrshrn_u16 (z6, 16))
+
+/*
+** qrshrn_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** uqrshrn z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z0_z29_13, svuint32x2_t, svuint16_t,
+ z0_res = svqrshrn_n_u16_u32_x2 (z29, 13),
+ z0_res = svqrshrn_u16 (z29, 13))
+
+/*
+** qrshrn_z5_z0_11:
+** uqrshrn z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z5_z0_11, svuint32x2_t, svuint16_t,
+ z5 = svqrshrn_n_u16_u32_x2 (z0, 11),
+ z5 = svqrshrn_u16 (z0, 11))
+
+/*
+** qrshrn_z22_z16_15:
+** uqrshrn z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshrn_z22_z16_15, svuint32x2_t, svuint16_t,
+ z22 = svqrshrn_n_u16_u32_x2 (z16, 15),
+ z22 = svqrshrn_u16 (z16, 15))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrun_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrun_u16_x2.c
new file mode 100644
index 0000000..0061c5fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/qrshrun_u16_x2.c
@@ -0,0 +1,57 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** qrshrun_z0_z0_1:
+** sqrshrun z0\.h, {z0\.s - z1\.s}, #1
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z0_z0_1, svint32x2_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s32_x2 (z0, 1),
+ z0_res = svqrshrun_u16 (z0, 1))
+
+/*
+** qrshrun_z0_z6_16:
+** sqrshrun z0\.h, {z6\.s - z7\.s}, #16
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z0_z6_16, svint32x2_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s32_x2 (z6, 16),
+ z0_res = svqrshrun_u16 (z6, 16))
+
+/*
+** qrshrun_z0_z29_13:
+** mov [^\n]+
+** mov [^\n]+
+** sqrshrun z0\.h, [^\n]+, #13
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z0_z29_13, svint32x2_t, svuint16_t,
+ z0_res = svqrshrun_n_u16_s32_x2 (z29, 13),
+ z0_res = svqrshrun_u16 (z29, 13))
+
+/*
+** qrshrun_z5_z0_11:
+** sqrshrun z5\.h, {z0\.s - z1\.s}, #11
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z5_z0_11, svint32x2_t, svuint16_t,
+ z5 = svqrshrun_n_u16_s32_x2 (z0, 11),
+ z5 = svqrshrun_u16 (z0, 11))
+
+/*
+** qrshrun_z22_z16_15:
+** sqrshrun z22\.h, {z16\.s - z17\.s}, #15
+** ret
+*/
+TEST_X2_NARROW (qrshrun_z22_z16_15, svint32x2_t, svuint16_t,
+ z22 = svqrshrun_n_u16_s32_x2 (z16, 15),
+ z22 = svqrshrun_u16 (z16, 15))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_bf16_x2.c
new file mode 100644
index 0000000..b72245e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_bf16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_bf16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_bf16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_bf16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_bf16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_bf16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_bf16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_bf16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_bf16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_bf16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ svst1_bf16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_bf16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_bf16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_bf16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_bf16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_bf16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ svst1_vnum_bf16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_bf16_x4.c
new file mode 100644
index 0000000..19df2c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_bf16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_bf16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_bf16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_bf16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_bf16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_bf16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_bf16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_bf16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_bf16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_bf16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_bf16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_bf16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_bf16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ svst1_bf16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_bf16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_bf16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_bf16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_bf16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_bf16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_bf16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_bf16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ svst1_vnum_bf16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f16_x2.c
new file mode 100644
index 0000000..3757e9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_base, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_index, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_1, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_f16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_2, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_f16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_14, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_16, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m1, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_f16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m2, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_f16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m16, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m18, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z17, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z22, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z28, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn0, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn7, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn15, svfloat16x2_t, float16_t,
+ svst1_f16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_0, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_1, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_f16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_2, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_f16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_14, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_16, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_f16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_f16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ svst1_vnum_f16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f16_x4.c
new file mode 100644
index 0000000..0c70a67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_base, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_index, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_1, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_2, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_3, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_f16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_4, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_f16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_28, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_f16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_32, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m1, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m2, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m3, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_f16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m4, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_f16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m32, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_f16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_m36, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z17, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z22, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_z28, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn0, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn7, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f16_pn15, svfloat16x4_t, float16_t,
+ svst1_f16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_0, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_1, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_2, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_3, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_f16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_4, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_f16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_28, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_f16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_32, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_f16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_f16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_f16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ svst1_vnum_f16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f32_x2.c
new file mode 100644
index 0000000..b07bff7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f32_base:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_base, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f32_index:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_index, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_1, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/*
+** st1_f32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_2, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** st1_f32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_14, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svst1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_16, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svst1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m1, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/*
+** st1_f32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m2, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** st1_f32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m16, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svst1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** st1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m18, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svst1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** st1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z17, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f32_z22:
+** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z22, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f32_z28:
+** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z28, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn0, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn7, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f32_pn15:
+** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn15, svfloat32x2_t, float32_t,
+ svst1_f32_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f32_0:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_0, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_1, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_f32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_2, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_f32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_14, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_16, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_f32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_f32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ svst1_vnum_f32_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f32_x4.c
new file mode 100644
index 0000000..d31f95c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f32_base:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_base, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f32_index:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_index, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_1, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_2, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_3, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svst1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** st1_f32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_4, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svst1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** st1_f32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_28, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svst1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** st1_f32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_32, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svst1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m1, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m2, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m3, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svst1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** st1_f32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m4, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svst1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** st1_f32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m32, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svst1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** st1_f32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_m36, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svst1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** st1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z17, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z22, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f32_z28:
+** st1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_z28, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn0, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn7, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f32_pn15:
+** st1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f32_pn15, svfloat32x4_t, float32_t,
+ svst1_f32_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f32_0:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_0, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_1, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_2, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_3, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_f32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_4, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_f32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_28, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_f32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_32, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_f32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_f32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_f32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ svst1_vnum_f32_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f64_x2.c
new file mode 100644
index 0000000..b735a8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f64_base:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_base, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f64_index:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_index, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_1, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/*
+** st1_f64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_2, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** st1_f64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_14, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svst1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_16, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svst1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m1, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/*
+** st1_f64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m2, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** st1_f64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m16, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svst1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** st1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m18, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svst1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** st1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z17, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f64_z22:
+** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z22, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f64_z28:
+** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z28, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn0, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn7, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f64_pn15:
+** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn15, svfloat64x2_t, float64_t,
+ svst1_f64_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f64_0:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_0, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_1, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_f64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_2, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_f64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_14, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_16, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_f64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_f64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ svst1_vnum_f64_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f64_x4.c
new file mode 100644
index 0000000..c65da0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_f64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_f64_base:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_base, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_f64_index:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_index, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_1, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_2, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_3, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svst1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** st1_f64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_4, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svst1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** st1_f64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_28, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svst1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** st1_f64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_32, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svst1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m1, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m2, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_f64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m3, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svst1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** st1_f64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m4, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svst1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** st1_f64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m32, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svst1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** st1_f64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_m36, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svst1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** st1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z17, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_f64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z22, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_f64_z28:
+** st1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_z28, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn0, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn7, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_f64_pn15:
+** st1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_f64_pn15, svfloat64x4_t, float64_t,
+ svst1_f64_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_f64_0:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_0, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_1, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_2, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_3, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_f64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_4, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_f64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_28, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_f64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_32, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_f64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_f64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_f64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_f64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ svst1_vnum_f64_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s16_x2.c
new file mode 100644
index 0000000..96ebe51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_base, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_index, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_1, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_s16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_2, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_s16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_14, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_16, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m1, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_s16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m2, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_s16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m16, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m18, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z17, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z22, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z28, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn0, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn7, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn15, svint16x2_t, int16_t,
+ svst1_s16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_0, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_1, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_2, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_14, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_16, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m1, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m2, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m16, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m18, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_x1, svint16x2_t, int16_t,
+ svst1_vnum_s16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s16_x4.c
new file mode 100644
index 0000000..d764522
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_base, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_index, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_1, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_2, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_3, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_s16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_4, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_s16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_28, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_s16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_32, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m1, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m2, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m3, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_s16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m4, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_s16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m32, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_s16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_m36, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z17, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z22, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_z28, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn0, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn7, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s16_pn15, svint16x4_t, int16_t,
+ svst1_s16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_0, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_1, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_2, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_3, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_4, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_28, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_32, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m1, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m2, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m3, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m4, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m32, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_m36, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s16_x1, svint16x4_t, int16_t,
+ svst1_vnum_s16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s32_x2.c
new file mode 100644
index 0000000..0fe77d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s32_base:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_base, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s32_index:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_index, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_1, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/*
+** st1_s32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_2, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** st1_s32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_14, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svst1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_16, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svst1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m1, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/*
+** st1_s32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m2, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** st1_s32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m16, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svst1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** st1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m18, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svst1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** st1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z17, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s32_z22:
+** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z22, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s32_z28:
+** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z28, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn0, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn7, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s32_pn15:
+** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn15, svint32x2_t, int32_t,
+ svst1_s32_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s32_0:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_0, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_1, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_2, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_14, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_16, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m1, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m2, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m16, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m18, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_x1, svint32x2_t, int32_t,
+ svst1_vnum_s32_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s32_x4.c
new file mode 100644
index 0000000..8a833e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s32_base:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_base, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s32_index:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_index, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_1, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_2, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_3, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svst1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** st1_s32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_4, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svst1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** st1_s32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_28, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svst1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** st1_s32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_32, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svst1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m1, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m2, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m3, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svst1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** st1_s32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m4, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svst1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** st1_s32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m32, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svst1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** st1_s32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_m36, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svst1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** st1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z17, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z22, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s32_z28:
+** st1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_z28, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn0, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn7, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s32_pn15:
+** st1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s32_pn15, svint32x4_t, int32_t,
+ svst1_s32_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s32_0:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_0, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_1, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_2, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_3, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_4, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_28, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_32, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m1, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m2, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m3, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m4, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m32, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_m36, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s32_x1, svint32x4_t, int32_t,
+ svst1_vnum_s32_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s64_x2.c
new file mode 100644
index 0000000..06484d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s64_base:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_base, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s64_index:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_index, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_1, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/*
+** st1_s64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_2, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** st1_s64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_14, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svst1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_16, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svst1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m1, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/*
+** st1_s64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m2, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** st1_s64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m16, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svst1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** st1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m18, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svst1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** st1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z17, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s64_z22:
+** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z22, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s64_z28:
+** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z28, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn0, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn7, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s64_pn15:
+** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn15, svint64x2_t, int64_t,
+ svst1_s64_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s64_0:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_0, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_1, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_2, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_14, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_16, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m1, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m2, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m16, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m18, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_x1, svint64x2_t, int64_t,
+ svst1_vnum_s64_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s64_x4.c
new file mode 100644
index 0000000..e571906
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s64_base:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_base, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s64_index:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_index, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_1, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_2, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_3, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svst1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** st1_s64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_4, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svst1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** st1_s64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_28, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svst1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** st1_s64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_32, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svst1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m1, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m2, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m3, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svst1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** st1_s64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m4, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svst1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** st1_s64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m32, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svst1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** st1_s64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_m36, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svst1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** st1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z17, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z22, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s64_z28:
+** st1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_z28, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn0, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn7, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s64_pn15:
+** st1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s64_pn15, svint64x4_t, int64_t,
+ svst1_s64_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s64_0:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_0, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_1, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_2, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_3, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_4, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_28, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_32, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m1, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m2, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m3, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m4, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m32, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_m36, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s64_x1, svint64x4_t, int64_t,
+ svst1_vnum_s64_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s8_x2.c
new file mode 100644
index 0000000..766d862
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s8_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s8_base:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_base, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s8_index:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_index, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_1, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/*
+** st1_s8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_2, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** st1_s8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_14, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svst1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_16, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svst1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m1, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/*
+** st1_s8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m2, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** st1_s8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m16, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svst1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** st1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m18, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svst1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** st1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z17, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s8_z22:
+** st1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z22, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s8_z28:
+** st1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z28, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn0, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn7, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s8_pn15:
+** st1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn15, svint8x2_t, int8_t,
+ svst1_s8_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s8_0:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_0, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_1, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_s8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_2, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_s8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_14, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_16, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m1, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_s8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m2, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_s8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m16, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m18, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_x1, svint8x2_t, int8_t,
+ svst1_vnum_s8_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s8_x4.c
new file mode 100644
index 0000000..024ce72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_s8_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_s8_base:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_base, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_s8_index:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_index, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_1, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_2, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_3, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svst1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** st1_s8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_4, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svst1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** st1_s8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_28, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svst1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** st1_s8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_32, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svst1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m1, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m2, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_s8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m3, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svst1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** st1_s8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m4, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svst1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** st1_s8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m32, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svst1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** st1_s8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_m36, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svst1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** st1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z17, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_s8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z22, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_s8_z28:
+** st1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_z28, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn0, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn7, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_s8_pn15:
+** st1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_s8_pn15, svint8x4_t, int8_t,
+ svst1_s8_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_s8_0:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_0, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_1, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_2, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_3, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_s8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_4, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_s8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_28, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_s8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_32, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m1, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m2, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_s8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m3, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_s8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m4, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_s8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m32, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_s8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_m36, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_s8_x1, svint8x4_t, int8_t,
+ svst1_vnum_s8_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u16_x2.c
new file mode 100644
index 0000000..9955c4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u16_base:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_base, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u16_index:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_index, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_1, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/*
+** st1_u16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_2, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** st1_u16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_14, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svst1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_16, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svst1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m1, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/*
+** st1_u16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m2, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** st1_u16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m16, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svst1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** st1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m18, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svst1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** st1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z17, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u16_z22:
+** st1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z22, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u16_z28:
+** st1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z28, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn0, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn7, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u16_pn15:
+** st1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn15, svuint16x2_t, uint16_t,
+ svst1_u16_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u16_0:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_0, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_1:
+** incb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_1, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u16_2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_2, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u16_14:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_14, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_16:
+** incb x0, all, mul #16
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_16, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m1:
+** decb x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u16_m2:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u16_m16:
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ svst1_vnum_u16_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u16_x4.c
new file mode 100644
index 0000000..cbf2249
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u16_base:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_base, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u16_index:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_index, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_1, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth (), z0),
+ svst1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_2, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svst1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_3, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svst1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** st1_u16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_4, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svst1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** st1_u16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_28, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svst1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** st1_u16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_32, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svst1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m1, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth (), z0),
+ svst1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m2, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svst1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m3, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svst1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** st1_u16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m4, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svst1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** st1_u16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m32, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svst1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** st1_u16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_m36, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svst1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** st1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z17, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z22, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u16_z28:
+** st1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_z28, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn0, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn7, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u16_pn15:
+** st1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u16_pn15, svuint16x4_t, uint16_t,
+ svst1_u16_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u16_0:
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_0, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_1:
+** incb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_1, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_2:
+** incb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_2, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_3:
+** incb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_3, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u16_4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_4, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u16_28:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_28, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u16_32:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_32, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m1:
+** decb x0
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m2:
+** decb x0, all, mul #2
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u16_m3:
+** decb x0, all, mul #3
+** st1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u16_m4:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u16_m32:
+** st1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u16_m36:
+** [^{]*
+** st1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ svst1_vnum_u16_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u32_x2.c
new file mode 100644
index 0000000..fe1763b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u32_base:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_base, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u32_index:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_index, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_1, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/*
+** st1_u32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_2, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** st1_u32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_14, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svst1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_16, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svst1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m1, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/*
+** st1_u32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m2, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** st1_u32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m16, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svst1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** st1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m18, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svst1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** st1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z17, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u32_z22:
+** st1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z22, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u32_z28:
+** st1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z28, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn0, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn7, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u32_pn15:
+** st1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn15, svuint32x2_t, uint32_t,
+ svst1_u32_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u32_0:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_0, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_1:
+** incb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_1, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u32_2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_2, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u32_14:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_14, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_16:
+** incb x0, all, mul #16
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_16, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m1:
+** decb x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u32_m2:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u32_m16:
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ svst1_vnum_u32_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u32_x4.c
new file mode 100644
index 0000000..9ebe9a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u32_base:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_base, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u32_index:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_index, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_1, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw (), z0),
+ svst1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_2, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svst1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_3, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svst1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** st1_u32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_4, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svst1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** st1_u32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_28, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svst1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** st1_u32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_32, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svst1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m1, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw (), z0),
+ svst1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m2, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svst1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m3, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svst1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** st1_u32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m4, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svst1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** st1_u32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m32, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svst1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** st1_u32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_m36, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svst1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** st1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z17, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z22, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u32_z28:
+** st1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_z28, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn0, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn7, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u32_pn15:
+** st1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u32_pn15, svuint32x4_t, uint32_t,
+ svst1_u32_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u32_0:
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_0, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_1:
+** incb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_1, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_2:
+** incb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_2, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_3:
+** incb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_3, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u32_4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_4, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u32_28:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_28, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u32_32:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_32, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m1:
+** decb x0
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m2:
+** decb x0, all, mul #2
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u32_m3:
+** decb x0, all, mul #3
+** st1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u32_m4:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u32_m32:
+** st1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u32_m36:
+** [^{]*
+** st1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ svst1_vnum_u32_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u64_x2.c
new file mode 100644
index 0000000..9e439d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u64_base:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_base, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u64_index:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_index, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_1, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/*
+** st1_u64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_2, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** st1_u64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_14, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svst1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_16, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svst1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m1, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/*
+** st1_u64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m2, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** st1_u64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m16, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svst1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** st1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m18, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svst1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** st1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z17, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u64_z22:
+** st1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z22, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u64_z28:
+** st1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z28, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn0, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn7, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u64_pn15:
+** st1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn15, svuint64x2_t, uint64_t,
+ svst1_u64_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u64_0:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_0, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_1:
+** incb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_1, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u64_2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_2, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u64_14:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_14, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_16:
+** incb x0, all, mul #16
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_16, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m1:
+** decb x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u64_m2:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u64_m16:
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ svst1_vnum_u64_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u64_x4.c
new file mode 100644
index 0000000..7d7e649
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u64_base:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_base, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u64_index:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_index, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_1, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd (), z0),
+ svst1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_2, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svst1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_3, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svst1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** st1_u64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_4, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svst1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** st1_u64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_28, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svst1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** st1_u64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_32, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svst1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m1, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd (), z0),
+ svst1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m2, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svst1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m3, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svst1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** st1_u64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m4, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svst1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** st1_u64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m32, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svst1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** st1_u64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_m36, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svst1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** st1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z17, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z22, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u64_z28:
+** st1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_z28, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn0, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn7, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u64_pn15:
+** st1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u64_pn15, svuint64x4_t, uint64_t,
+ svst1_u64_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u64_0:
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_0, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_1:
+** incb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_1, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_2:
+** incb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_2, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_3:
+** incb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_3, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u64_4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_4, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u64_28:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_28, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u64_32:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_32, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m1:
+** decb x0
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m2:
+** decb x0, all, mul #2
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u64_m3:
+** decb x0, all, mul #3
+** st1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u64_m4:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u64_m32:
+** st1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u64_m36:
+** [^{]*
+** st1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ svst1_vnum_u64_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u8_x2.c
new file mode 100644
index 0000000..6440916
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u8_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u8_base:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_base, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u8_index:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_index, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_1, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/*
+** st1_u8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_2, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** st1_u8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_14, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svst1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_16, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svst1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m1, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/*
+** st1_u8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m2, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** st1_u8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m16, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svst1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** st1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m18, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svst1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** st1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z17, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u8_z22:
+** st1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z22, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u8_z28:
+** st1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z28, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn0, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn7, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u8_pn15:
+** st1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn15, svuint8x2_t, uint8_t,
+ svst1_u8_x2 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u8_0:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_0, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_1:
+** incb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_1, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/*
+** st1_vnum_u8_2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_2, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/*
+** st1_vnum_u8_14:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_14, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 14, z0),
+ svst1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_16:
+** incb x0, all, mul #16
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_16, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, 16, z0),
+ svst1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m1:
+** decb x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/*
+** st1_vnum_u8_m2:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/*
+** st1_vnum_u8_m16:
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -16, z0),
+ svst1_vnum (pn8, x0, -16, z0))
+
+/*
+** st1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, -18, z0),
+ svst1_vnum (pn8, x0, -18, z0))
+
+/*
+** st1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ svst1_vnum_u8_x2 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u8_x4.c
new file mode 100644
index 0000000..206467a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/st1_u8_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** st1_u8_base:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_base, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z0),
+ svst1 (pn8, x0, z0))
+
+/*
+** st1_u8_index:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_index, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + x1, z0),
+ svst1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_1, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb (), z0),
+ svst1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_2, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svst1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_3, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svst1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** st1_u8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_4, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svst1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** st1_u8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_28, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svst1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** st1_u8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_32, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svst1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m1, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb (), z0),
+ svst1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m2, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svst1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_u8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m3, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svst1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** st1_u8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m4, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svst1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** st1_u8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m32, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svst1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** st1_u8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_m36, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svst1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** st1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z17, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z17),
+ svst1 (pn8, x0, z17))
+
+/*
+** st1_u8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** st1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z22, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z22),
+ svst1 (pn8, x0, z22))
+
+/*
+** st1_u8_z28:
+** st1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_z28, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn8, x0, z28),
+ svst1 (pn8, x0, z28))
+
+/*
+** st1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn0, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn0, x0, z0),
+ svst1 (pn0, x0, z0))
+
+/*
+** st1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** st1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn7, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn7, x0, z0),
+ svst1 (pn7, x0, z0))
+
+/*
+** st1_u8_pn15:
+** st1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_u8_pn15, svuint8x4_t, uint8_t,
+ svst1_u8_x4 (pn15, x0, z0),
+ svst1 (pn15, x0, z0))
+
+/*
+** st1_vnum_u8_0:
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_0, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 0, z0),
+ svst1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_1:
+** incb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_1, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 1, z0),
+ svst1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_2:
+** incb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_2, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 2, z0),
+ svst1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_3:
+** incb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_3, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 3, z0),
+ svst1_vnum (pn8, x0, 3, z0))
+
+/*
+** st1_vnum_u8_4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_4, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 4, z0),
+ svst1_vnum (pn8, x0, 4, z0))
+
+/*
+** st1_vnum_u8_28:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_28, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 28, z0),
+ svst1_vnum (pn8, x0, 28, z0))
+
+/*
+** st1_vnum_u8_32:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_32, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, 32, z0),
+ svst1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m1:
+** decb x0
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -1, z0),
+ svst1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m2:
+** decb x0, all, mul #2
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -2, z0),
+ svst1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** st1_vnum_u8_m3:
+** decb x0, all, mul #3
+** st1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -3, z0),
+ svst1_vnum (pn8, x0, -3, z0))
+
+/*
+** st1_vnum_u8_m4:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -4, z0),
+ svst1_vnum (pn8, x0, -4, z0))
+
+/*
+** st1_vnum_u8_m32:
+** st1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -32, z0),
+ svst1_vnum (pn8, x0, -32, z0))
+
+/*
+** st1_vnum_u8_m36:
+** [^{]*
+** st1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, -36, z0),
+ svst1_vnum (pn8, x0, -36, z0))
+
+/*
+** st1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** st1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** st1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (st1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ svst1_vnum_u8_x4 (pn8, x0, x1, z0),
+ svst1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_bf16_x2.c
new file mode 100644
index 0000000..bd0ddcd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_bf16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_bf16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_base, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_bf16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_index, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_bf16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_bf16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_bf16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_bf16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z17, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_bf16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z22, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_bf16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z28, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn0, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn7, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_bf16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn15, svbfloat16x2_t, bfloat16_t,
+ svstnt1_bf16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_bf16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_0, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_bf16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_bf16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_14, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_bf16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_bf16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_bf16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t,
+ svstnt1_vnum_bf16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_bf16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_bf16_x4.c
new file mode 100644
index 0000000..29cfd2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_bf16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_bf16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_base, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_bf16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_index, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_bf16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_bf16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_bf16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_bf16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_bf16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_bf16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_bf16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_bf16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z17, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_bf16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z22, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_bf16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_z28, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_bf16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn0, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_bf16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn7, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_bf16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_bf16_pn15, svbfloat16x4_t, bfloat16_t,
+ svstnt1_bf16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_bf16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_0, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_bf16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_bf16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_28, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_bf16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_bf16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_bf16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_bf16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_bf16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_bf16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t,
+ svstnt1_vnum_bf16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f16_x2.c
new file mode 100644
index 0000000..fa21158
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_base, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_index, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_1, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_f16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_2, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_f16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_14, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_16, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m1, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_f16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m2, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_f16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m16, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m18, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z17, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z22, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z28, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn0, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn7, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn15, svfloat16x2_t, float16_t,
+ svstnt1_f16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_0, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_1, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_f16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_2, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_f16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_14, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_16, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m1, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_f16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m2, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_f16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m16, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_f16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m18, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_x1, svfloat16x2_t, float16_t,
+ svstnt1_vnum_f16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f16_x4.c
new file mode 100644
index 0000000..6ad4344
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_base, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_index, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_1, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_2, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_3, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_f16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_4, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_f16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_28, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_f16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_32, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m1, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m2, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m3, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_f16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m4, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_f16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m32, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_f16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_m36, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_f16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z17, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z22, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_z28, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn0, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn7, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f16_pn15, svfloat16x4_t, float16_t,
+ svstnt1_f16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_0, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_1, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_2, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_3, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_f16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_4, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_f16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_28, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_f16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_32, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m1, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m2, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m3, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_f16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m4, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_f16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m32, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_f16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_m36, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_f16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f16_x1, svfloat16x4_t, float16_t,
+ svstnt1_vnum_f16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f32_x2.c
new file mode 100644
index 0000000..3c9ce1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f32_base:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_base, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f32_index:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_index, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_1, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/*
+** stnt1_f32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_2, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** stnt1_f32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_14, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svstnt1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_16, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svstnt1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m1, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/*
+** stnt1_f32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m2, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** stnt1_f32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m16, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svstnt1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** stnt1_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m18, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svstnt1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** stnt1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z17, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f32_z22:
+** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z22, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f32_z28:
+** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z28, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn0, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn7, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f32_pn15:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn15, svfloat32x2_t, float32_t,
+ svstnt1_f32_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f32_0:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_0, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_1, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_f32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_2, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_f32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_14, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_16, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m1, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_f32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m2, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_f32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m16, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_f32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m18, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_x1, svfloat32x2_t, float32_t,
+ svstnt1_vnum_f32_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f32_x4.c
new file mode 100644
index 0000000..ccf64a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f32_base:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_base, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f32_index:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_index, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_1, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_2, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_3, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svstnt1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** stnt1_f32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_4, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svstnt1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** stnt1_f32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_28, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svstnt1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** stnt1_f32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_32, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svstnt1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m1, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m2, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m3, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svstnt1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** stnt1_f32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m4, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svstnt1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** stnt1_f32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m32, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svstnt1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** stnt1_f32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_m36, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svstnt1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** stnt1_f32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z17, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z22, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f32_z28:
+** stnt1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_z28, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn0, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn7, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f32_pn15:
+** stnt1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f32_pn15, svfloat32x4_t, float32_t,
+ svstnt1_f32_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f32_0:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_0, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_1, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_2, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_3, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_f32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_4, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_f32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_28, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_f32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_32, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m1, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m2, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m3, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_f32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m4, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_f32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m32, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_f32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_m36, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_f32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f32_x1, svfloat32x4_t, float32_t,
+ svstnt1_vnum_f32_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f64_x2.c
new file mode 100644
index 0000000..1c98c11
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f64_base:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_base, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f64_index:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_index, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_1, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/*
+** stnt1_f64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_2, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** stnt1_f64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_14, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svstnt1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_16, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svstnt1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m1, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/*
+** stnt1_f64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m2, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** stnt1_f64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m16, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svstnt1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** stnt1_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m18, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svstnt1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** stnt1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z17, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f64_z22:
+** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z22, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f64_z28:
+** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z28, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn0, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn7, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f64_pn15:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn15, svfloat64x2_t, float64_t,
+ svstnt1_f64_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f64_0:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_0, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_1, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_f64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_2, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_f64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_14, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_16, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m1, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_f64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m2, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_f64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m16, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_f64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m18, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_x1, svfloat64x2_t, float64_t,
+ svstnt1_vnum_f64_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f64_x4.c
new file mode 100644
index 0000000..8d6bc78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_f64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_f64_base:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_base, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_f64_index:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_index, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_1, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_2, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_3, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svstnt1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** stnt1_f64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_4, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svstnt1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** stnt1_f64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_28, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svstnt1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** stnt1_f64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_32, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svstnt1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m1, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m2, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_f64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m3, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svstnt1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** stnt1_f64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m4, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svstnt1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** stnt1_f64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m32, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svstnt1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** stnt1_f64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_m36, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svstnt1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** stnt1_f64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z17, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_f64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z22, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_f64_z28:
+** stnt1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_z28, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_f64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn0, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_f64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn7, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_f64_pn15:
+** stnt1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_f64_pn15, svfloat64x4_t, float64_t,
+ svstnt1_f64_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_f64_0:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_0, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_1, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_2, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_3, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_f64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_4, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_f64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_28, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_f64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_32, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m1, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m2, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_f64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m3, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_f64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m4, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_f64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m32, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_f64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_m36, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_f64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_f64_x1, svfloat64x4_t, float64_t,
+ svstnt1_vnum_f64_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s16_x2.c
new file mode 100644
index 0000000..3e2e7b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_base, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_index, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_1, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_s16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_2, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_s16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_14, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_16, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m1, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_s16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m2, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_s16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m16, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m18, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z17, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z22, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z28, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn0, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn7, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn15, svint16x2_t, int16_t,
+ svstnt1_s16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_0, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_1, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_2, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_14, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_16, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m1, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m2, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m16, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m18, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_x1, svint16x2_t, int16_t,
+ svstnt1_vnum_s16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s16_x4.c
new file mode 100644
index 0000000..4fa4d15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_base, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_index, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_1, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_2, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_3, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_s16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_4, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_s16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_28, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_s16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_32, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m1, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m2, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m3, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_s16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m4, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_s16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m32, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_s16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_m36, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_s16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z17, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z22, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_z28, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn0, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn7, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s16_pn15, svint16x4_t, int16_t,
+ svstnt1_s16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_0, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_1, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_2, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_3, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_4, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_28, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_32, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m1, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m2, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m3, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m4, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m32, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_m36, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s16_x1, svint16x4_t, int16_t,
+ svstnt1_vnum_s16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s32_x2.c
new file mode 100644
index 0000000..47a19b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s32_base:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_base, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s32_index:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_index, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_1, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/*
+** stnt1_s32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_2, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** stnt1_s32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_14, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svstnt1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_16, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svstnt1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m1, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/*
+** stnt1_s32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m2, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** stnt1_s32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m16, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svstnt1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** stnt1_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m18, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svstnt1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** stnt1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z17, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s32_z22:
+** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z22, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s32_z28:
+** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z28, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn0, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn7, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s32_pn15:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn15, svint32x2_t, int32_t,
+ svstnt1_s32_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s32_0:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_0, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_1, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_2, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_14, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_16, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m1, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m2, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m16, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m18, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_x1, svint32x2_t, int32_t,
+ svstnt1_vnum_s32_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s32_x4.c
new file mode 100644
index 0000000..c8cfff1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s32_base:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_base, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s32_index:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_index, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_1, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_2, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_3, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svstnt1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** stnt1_s32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_4, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svstnt1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** stnt1_s32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_28, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svstnt1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** stnt1_s32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_32, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svstnt1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m1, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m2, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m3, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svstnt1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** stnt1_s32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m4, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svstnt1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** stnt1_s32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m32, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svstnt1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** stnt1_s32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_m36, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svstnt1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** stnt1_s32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z17, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z22, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s32_z28:
+** stnt1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_z28, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn0, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn7, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s32_pn15:
+** stnt1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s32_pn15, svint32x4_t, int32_t,
+ svstnt1_s32_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s32_0:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_0, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_1, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_2, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_3, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_4, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_28, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_32, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m1, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m2, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m3, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m4, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m32, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_m36, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s32_x1, svint32x4_t, int32_t,
+ svstnt1_vnum_s32_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s64_x2.c
new file mode 100644
index 0000000..6527734
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s64_base:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_base, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s64_index:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_index, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_1, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/*
+** stnt1_s64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_2, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** stnt1_s64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_14, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svstnt1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_16, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svstnt1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m1, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/*
+** stnt1_s64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m2, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** stnt1_s64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m16, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svstnt1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** stnt1_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m18, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svstnt1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** stnt1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z17, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s64_z22:
+** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z22, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s64_z28:
+** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z28, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn0, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn7, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s64_pn15:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn15, svint64x2_t, int64_t,
+ svstnt1_s64_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s64_0:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_0, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_1, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_2, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_14, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_16, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m1, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m2, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m16, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m18, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_x1, svint64x2_t, int64_t,
+ svstnt1_vnum_s64_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s64_x4.c
new file mode 100644
index 0000000..84087c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s64_base:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_base, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s64_index:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_index, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_1, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_2, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_3, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svstnt1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** stnt1_s64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_4, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svstnt1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** stnt1_s64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_28, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svstnt1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** stnt1_s64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_32, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svstnt1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m1, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m2, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m3, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svstnt1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** stnt1_s64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m4, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svstnt1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** stnt1_s64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m32, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svstnt1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** stnt1_s64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_m36, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svstnt1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** stnt1_s64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z17, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z22, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s64_z28:
+** stnt1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_z28, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn0, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn7, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s64_pn15:
+** stnt1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s64_pn15, svint64x4_t, int64_t,
+ svstnt1_s64_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s64_0:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_0, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_1, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_2, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_3, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_4, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_28, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_32, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m1, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m2, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m3, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m4, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m32, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_m36, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s64_x1, svint64x4_t, int64_t,
+ svstnt1_vnum_s64_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s8_x2.c
new file mode 100644
index 0000000..d2337e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s8_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s8_base:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_base, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s8_index:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_index, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_1, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/*
+** stnt1_s8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_2, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** stnt1_s8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_14, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svstnt1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_16, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svstnt1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m1, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/*
+** stnt1_s8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m2, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** stnt1_s8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m16, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svstnt1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** stnt1_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m18, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svstnt1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** stnt1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z17, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s8_z22:
+** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z22, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s8_z28:
+** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z28, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn0, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn7, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s8_pn15:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn15, svint8x2_t, int8_t,
+ svstnt1_s8_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s8_0:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_0, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_1, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_s8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_2, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_s8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_14, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_16, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m1, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_s8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m2, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_s8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m16, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_s8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m18, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_x1, svint8x2_t, int8_t,
+ svstnt1_vnum_s8_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s8_x4.c
new file mode 100644
index 0000000..5f08d02
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_s8_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_s8_base:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_base, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_s8_index:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_index, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_1, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_2, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_3, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svstnt1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** stnt1_s8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_4, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svstnt1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** stnt1_s8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_28, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svstnt1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** stnt1_s8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_32, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svstnt1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m1, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m2, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_s8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m3, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svstnt1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** stnt1_s8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m4, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svstnt1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** stnt1_s8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m32, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svstnt1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** stnt1_s8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_m36, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svstnt1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** stnt1_s8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z17, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_s8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z22, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_s8_z28:
+** stnt1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_z28, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_s8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn0, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_s8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn7, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_s8_pn15:
+** stnt1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_s8_pn15, svint8x4_t, int8_t,
+ svstnt1_s8_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_s8_0:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_0, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_1, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_2, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_3, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_s8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_4, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_s8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_28, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_s8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_32, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m1, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m2, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_s8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m3, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_s8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m4, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_s8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m32, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_s8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_m36, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_s8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_s8_x1, svint8x4_t, int8_t,
+ svstnt1_vnum_s8_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u16_x2.c
new file mode 100644
index 0000000..b23d966
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u16_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u16_base:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_base, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u16_index:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_index, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_1, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/*
+** stnt1_u16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_2, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/*
+** stnt1_u16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_14, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth () * 14, z0),
+ svstnt1 (pn8, x0 + svcnth () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_16, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 + svcnth () * 16, z0),
+ svstnt1 (pn8, x0 + svcnth () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m1, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/*
+** stnt1_u16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m2, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/*
+** stnt1_u16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m16, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth () * 16, z0),
+ svstnt1 (pn8, x0 - svcnth () * 16, z0))
+
+/*
+** stnt1_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m18, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0 - svcnth () * 18, z0),
+ svstnt1 (pn8, x0 - svcnth () * 18, z0))
+
+/*
+** stnt1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z17, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u16_z22:
+** stnt1h {z22\.h(?: - |, )z23\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z22, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u16_z28:
+** stnt1h {z28\.h(?: - |, )z29\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z28, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn0, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn7, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u16_pn15:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn15, svuint16x2_t, uint16_t,
+ svstnt1_u16_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u16_0:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_0, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_1:
+** incb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_1, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u16_2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_2, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u16_14:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_14, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_16:
+** incb x0, all, mul #16
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_16, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m1:
+** decb x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m1, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u16_m2:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m2, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u16_m16:
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m16, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u16_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m18, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h(?: - |, )z1\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_x1, svuint16x2_t, uint16_t,
+ svstnt1_vnum_u16_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u16_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u16_x4.c
new file mode 100644
index 0000000..998ab03
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u16_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u16_base:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_base, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u16_index:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, x1, lsl #?1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_index, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_1, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth (), z0),
+ svstnt1 (pn8, x0 + svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_2, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 2, z0),
+ svstnt1 (pn8, x0 + svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_3, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 3, z0),
+ svstnt1 (pn8, x0 + svcnth () * 3, z0))
+
+/*
+** stnt1_u16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_4, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 4, z0),
+ svstnt1 (pn8, x0 + svcnth () * 4, z0))
+
+/*
+** stnt1_u16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_28, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 28, z0),
+ svstnt1 (pn8, x0 + svcnth () * 28, z0))
+
+/*
+** stnt1_u16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_32, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 + svcnth () * 32, z0),
+ svstnt1 (pn8, x0 + svcnth () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m1, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth (), z0),
+ svstnt1 (pn8, x0 - svcnth (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m2, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 2, z0),
+ svstnt1 (pn8, x0 - svcnth () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m3, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 3, z0),
+ svstnt1 (pn8, x0 - svcnth () * 3, z0))
+
+/*
+** stnt1_u16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m4, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 4, z0),
+ svstnt1 (pn8, x0 - svcnth () * 4, z0))
+
+/*
+** stnt1_u16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m32, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 32, z0),
+ svstnt1 (pn8, x0 - svcnth () * 32, z0))
+
+/*
+** stnt1_u16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_m36, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0 - svcnth () * 36, z0),
+ svstnt1 (pn8, x0 - svcnth () * 36, z0))
+
+/*
+** stnt1_u16_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z17, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u16_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1h {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z22, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u16_z28:
+** stnt1h {z28\.h - z31\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_z28, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u16_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn0, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u16_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1h {z0\.h - z3\.h}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn7, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u16_pn15:
+** stnt1h {z0\.h - z3\.h}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u16_pn15, svuint16x4_t, uint16_t,
+ svstnt1_u16_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u16_0:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_0, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_1:
+** incb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_1, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_2:
+** incb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_2, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_3:
+** incb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_3, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u16_4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_4, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u16_28:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_28, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u16_32:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_32, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m1:
+** decb x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m1, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m2:
+** decb x0, all, mul #2
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m2, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u16_m3:
+** decb x0, all, mul #3
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m3, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u16_m4:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m4, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u16_m32:
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m32, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u16_m36:
+** [^{]*
+** stnt1h {z0\.h - z3\.h}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_m36, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u16_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1h {z0\.h - z3\.h}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1h {z0\.h - z3\.h}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u16_x1, svuint16x4_t, uint16_t,
+ svstnt1_vnum_u16_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u32_x2.c
new file mode 100644
index 0000000..a8736cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u32_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u32_base:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_base, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u32_index:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_index, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_1, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/*
+** stnt1_u32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_2, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/*
+** stnt1_u32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_14, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw () * 14, z0),
+ svstnt1 (pn8, x0 + svcntw () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_16, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 + svcntw () * 16, z0),
+ svstnt1 (pn8, x0 + svcntw () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m1, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/*
+** stnt1_u32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m2, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/*
+** stnt1_u32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m16, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw () * 16, z0),
+ svstnt1 (pn8, x0 - svcntw () * 16, z0))
+
+/*
+** stnt1_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m18, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0 - svcntw () * 18, z0),
+ svstnt1 (pn8, x0 - svcntw () * 18, z0))
+
+/*
+** stnt1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z17, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u32_z22:
+** stnt1w {z22\.s(?: - |, )z23\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z22, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u32_z28:
+** stnt1w {z28\.s(?: - |, )z29\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z28, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn0, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn7, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u32_pn15:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn15, svuint32x2_t, uint32_t,
+ svstnt1_u32_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u32_0:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_0, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_1:
+** incb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_1, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u32_2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_2, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u32_14:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_14, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_16:
+** incb x0, all, mul #16
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_16, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m1:
+** decb x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m1, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u32_m2:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m2, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u32_m16:
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m16, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u32_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m18, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s(?: - |, )z1\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_x1, svuint32x2_t, uint32_t,
+ svstnt1_vnum_u32_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u32_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u32_x4.c
new file mode 100644
index 0000000..ff5d4cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u32_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u32_base:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_base, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u32_index:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, x1, lsl #?2\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_index, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_1, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw (), z0),
+ svstnt1 (pn8, x0 + svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_2, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 2, z0),
+ svstnt1 (pn8, x0 + svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_3, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 3, z0),
+ svstnt1 (pn8, x0 + svcntw () * 3, z0))
+
+/*
+** stnt1_u32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_4, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 4, z0),
+ svstnt1 (pn8, x0 + svcntw () * 4, z0))
+
+/*
+** stnt1_u32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_28, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 28, z0),
+ svstnt1 (pn8, x0 + svcntw () * 28, z0))
+
+/*
+** stnt1_u32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_32, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 + svcntw () * 32, z0),
+ svstnt1 (pn8, x0 + svcntw () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m1, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw (), z0),
+ svstnt1 (pn8, x0 - svcntw (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m2, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 2, z0),
+ svstnt1 (pn8, x0 - svcntw () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m3, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 3, z0),
+ svstnt1 (pn8, x0 - svcntw () * 3, z0))
+
+/*
+** stnt1_u32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m4, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 4, z0),
+ svstnt1 (pn8, x0 - svcntw () * 4, z0))
+
+/*
+** stnt1_u32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m32, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 32, z0),
+ svstnt1 (pn8, x0 - svcntw () * 32, z0))
+
+/*
+** stnt1_u32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_m36, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0 - svcntw () * 36, z0),
+ svstnt1 (pn8, x0 - svcntw () * 36, z0))
+
+/*
+** stnt1_u32_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z17, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u32_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1w {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z22, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u32_z28:
+** stnt1w {z28\.s - z31\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_z28, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u32_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn0, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u32_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1w {z0\.s - z3\.s}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn7, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u32_pn15:
+** stnt1w {z0\.s - z3\.s}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u32_pn15, svuint32x4_t, uint32_t,
+ svstnt1_u32_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u32_0:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_0, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_1:
+** incb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_1, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_2:
+** incb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_2, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_3:
+** incb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_3, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u32_4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_4, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u32_28:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_28, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u32_32:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_32, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m1:
+** decb x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m1, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m2:
+** decb x0, all, mul #2
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m2, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u32_m3:
+** decb x0, all, mul #3
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m3, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u32_m4:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m4, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u32_m32:
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m32, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u32_m36:
+** [^{]*
+** stnt1w {z0\.s - z3\.s}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_m36, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u32_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1w {z0\.s - z3\.s}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1w {z0\.s - z3\.s}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u32_x1, svuint32x4_t, uint32_t,
+ svstnt1_vnum_u32_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u64_x2.c
new file mode 100644
index 0000000..5de8f7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u64_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u64_base:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_base, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u64_index:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_index, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_1, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/*
+** stnt1_u64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_2, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/*
+** stnt1_u64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_14, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd () * 14, z0),
+ svstnt1 (pn8, x0 + svcntd () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_16, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 + svcntd () * 16, z0),
+ svstnt1 (pn8, x0 + svcntd () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m1, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/*
+** stnt1_u64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m2, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/*
+** stnt1_u64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m16, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd () * 16, z0),
+ svstnt1 (pn8, x0 - svcntd () * 16, z0))
+
+/*
+** stnt1_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m18, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0 - svcntd () * 18, z0),
+ svstnt1 (pn8, x0 - svcntd () * 18, z0))
+
+/*
+** stnt1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z17, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u64_z22:
+** stnt1d {z22\.d(?: - |, )z23\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z22, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u64_z28:
+** stnt1d {z28\.d(?: - |, )z29\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z28, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn0, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn7, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u64_pn15:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn15, svuint64x2_t, uint64_t,
+ svstnt1_u64_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u64_0:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_0, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_1:
+** incb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_1, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u64_2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_2, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u64_14:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_14, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_16:
+** incb x0, all, mul #16
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_16, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m1:
+** decb x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m1, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u64_m2:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m2, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u64_m16:
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m16, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u64_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m18, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d(?: - |, )z1\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_x1, svuint64x2_t, uint64_t,
+ svstnt1_vnum_u64_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u64_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u64_x4.c
new file mode 100644
index 0000000..23013d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u64_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u64_base:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_base, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u64_index:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, x1, lsl #?3\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_index, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_1, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd (), z0),
+ svstnt1 (pn8, x0 + svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_2, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 2, z0),
+ svstnt1 (pn8, x0 + svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_3, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 3, z0),
+ svstnt1 (pn8, x0 + svcntd () * 3, z0))
+
+/*
+** stnt1_u64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_4, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 4, z0),
+ svstnt1 (pn8, x0 + svcntd () * 4, z0))
+
+/*
+** stnt1_u64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_28, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 28, z0),
+ svstnt1 (pn8, x0 + svcntd () * 28, z0))
+
+/*
+** stnt1_u64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_32, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 + svcntd () * 32, z0),
+ svstnt1 (pn8, x0 + svcntd () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m1, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd (), z0),
+ svstnt1 (pn8, x0 - svcntd (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m2, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 2, z0),
+ svstnt1 (pn8, x0 - svcntd () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m3, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 3, z0),
+ svstnt1 (pn8, x0 - svcntd () * 3, z0))
+
+/*
+** stnt1_u64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m4, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 4, z0),
+ svstnt1 (pn8, x0 - svcntd () * 4, z0))
+
+/*
+** stnt1_u64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m32, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 32, z0),
+ svstnt1 (pn8, x0 - svcntd () * 32, z0))
+
+/*
+** stnt1_u64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_m36, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0 - svcntd () * 36, z0),
+ svstnt1 (pn8, x0 - svcntd () * 36, z0))
+
+/*
+** stnt1_u64_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z17, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u64_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1d {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z22, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u64_z28:
+** stnt1d {z28\.d - z31\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_z28, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u64_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn0, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u64_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1d {z0\.d - z3\.d}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn7, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u64_pn15:
+** stnt1d {z0\.d - z3\.d}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u64_pn15, svuint64x4_t, uint64_t,
+ svstnt1_u64_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u64_0:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_0, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_1:
+** incb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_1, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_2:
+** incb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_2, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_3:
+** incb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_3, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u64_4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_4, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u64_28:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_28, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u64_32:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_32, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m1:
+** decb x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m1, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m2:
+** decb x0, all, mul #2
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m2, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u64_m3:
+** decb x0, all, mul #3
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m3, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u64_m4:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m4, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u64_m32:
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m32, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u64_m36:
+** [^{]*
+** stnt1d {z0\.d - z3\.d}, pn8, \[x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_m36, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u64_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1d {z0\.d - z3\.d}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1d {z0\.d - z3\.d}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u64_x1, svuint64x4_t, uint64_t,
+ svstnt1_vnum_u64_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u8_x2.c
new file mode 100644
index 0000000..3ad22ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u8_x2.c
@@ -0,0 +1,269 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u8_base:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_base, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u8_index:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_index, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_1, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/*
+** stnt1_u8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_2, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/*
+** stnt1_u8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_14, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb () * 14, z0),
+ svstnt1 (pn8, x0 + svcntb () * 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_16, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 + svcntb () * 16, z0),
+ svstnt1 (pn8, x0 + svcntb () * 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m1, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/*
+** stnt1_u8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m2, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/*
+** stnt1_u8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m16, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb () * 16, z0),
+ svstnt1 (pn8, x0 - svcntb () * 16, z0))
+
+/*
+** stnt1_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m18, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0 - svcntb () * 18, z0),
+ svstnt1 (pn8, x0 - svcntb () * 18, z0))
+
+/*
+** stnt1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z17, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u8_z22:
+** stnt1b {z22\.b(?: - |, )z23\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z22, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u8_z28:
+** stnt1b {z28\.b(?: - |, )z29\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z28, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn0, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn7, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u8_pn15:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn15, svuint8x2_t, uint8_t,
+ svstnt1_u8_x2 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u8_0:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_0, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_1:
+** incb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_1, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/*
+** stnt1_vnum_u8_2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_2, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/*
+** stnt1_vnum_u8_14:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #14, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_14, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 14, z0),
+ svstnt1_vnum (pn8, x0, 14, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_16:
+** incb x0, all, mul #16
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_16, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, 16, z0),
+ svstnt1_vnum (pn8, x0, 16, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m1:
+** decb x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m1, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/*
+** stnt1_vnum_u8_m2:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-2, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m2, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/*
+** stnt1_vnum_u8_m16:
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, #-16, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m16, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -16, z0),
+ svstnt1_vnum (pn8, x0, -16, z0))
+
+/*
+** stnt1_vnum_u8_m18:
+** addvl (x[0-9]+), x0, #-18
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m18, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, -18, z0),
+ svstnt1_vnum (pn8, x0, -18, z0))
+
+/*
+** stnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b(?: - |, )z1\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_x1, svuint8x2_t, uint8_t,
+ svstnt1_vnum_u8_x2 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u8_x4.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u8_x4.c
new file mode 100644
index 0000000..eec54b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_u8_x4.c
@@ -0,0 +1,361 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** stnt1_u8_base:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_base, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z0),
+ svstnt1 (pn8, x0, z0))
+
+/*
+** stnt1_u8_index:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x1\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_index, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + x1, z0),
+ svstnt1 (pn8, x0 + x1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_1, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb (), z0),
+ svstnt1 (pn8, x0 + svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_2, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 2, z0),
+ svstnt1 (pn8, x0 + svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_3, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 3, z0),
+ svstnt1 (pn8, x0 + svcntb () * 3, z0))
+
+/*
+** stnt1_u8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_4, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 4, z0),
+ svstnt1 (pn8, x0 + svcntb () * 4, z0))
+
+/*
+** stnt1_u8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_28, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 28, z0),
+ svstnt1 (pn8, x0 + svcntb () * 28, z0))
+
+/*
+** stnt1_u8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_32, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 + svcntb () * 32, z0),
+ svstnt1 (pn8, x0 + svcntb () * 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m1, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb (), z0),
+ svstnt1 (pn8, x0 - svcntb (), z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m2, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 2, z0),
+ svstnt1 (pn8, x0 - svcntb () * 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_u8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m3, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 3, z0),
+ svstnt1 (pn8, x0 - svcntb () * 3, z0))
+
+/*
+** stnt1_u8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m4, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 4, z0),
+ svstnt1 (pn8, x0 - svcntb () * 4, z0))
+
+/*
+** stnt1_u8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m32, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 32, z0),
+ svstnt1 (pn8, x0 - svcntb () * 32, z0))
+
+/*
+** stnt1_u8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_m36, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0 - svcntb () * 36, z0),
+ svstnt1 (pn8, x0 - svcntb () * 36, z0))
+
+/*
+** stnt1_u8_z17:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z17, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z17),
+ svstnt1 (pn8, x0, z17))
+
+/*
+** stnt1_u8_z22:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** stnt1b {z[^\n]+}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z22, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z22),
+ svstnt1 (pn8, x0, z22))
+
+/*
+** stnt1_u8_z28:
+** stnt1b {z28\.b - z31\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_z28, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn8, x0, z28),
+ svstnt1 (pn8, x0, z28))
+
+/*
+** stnt1_u8_pn0:
+** mov p([89]|1[0-5])\.b, p0\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn0, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn0, x0, z0),
+ svstnt1 (pn0, x0, z0))
+
+/*
+** stnt1_u8_pn7:
+** mov p([89]|1[0-5])\.b, p7\.b
+** stnt1b {z0\.b - z3\.b}, pn\1, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn7, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn7, x0, z0),
+ svstnt1 (pn7, x0, z0))
+
+/*
+** stnt1_u8_pn15:
+** stnt1b {z0\.b - z3\.b}, pn15, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_u8_pn15, svuint8x4_t, uint8_t,
+ svstnt1_u8_x4 (pn15, x0, z0),
+ svstnt1 (pn15, x0, z0))
+
+/*
+** stnt1_vnum_u8_0:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_0, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 0, z0),
+ svstnt1_vnum (pn8, x0, 0, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_1:
+** incb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_1, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 1, z0),
+ svstnt1_vnum (pn8, x0, 1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_2:
+** incb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_2, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 2, z0),
+ svstnt1_vnum (pn8, x0, 2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_3:
+** incb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_3, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 3, z0),
+ svstnt1_vnum (pn8, x0, 3, z0))
+
+/*
+** stnt1_vnum_u8_4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_4, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 4, z0),
+ svstnt1_vnum (pn8, x0, 4, z0))
+
+/*
+** stnt1_vnum_u8_28:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #28, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_28, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 28, z0),
+ svstnt1_vnum (pn8, x0, 28, z0))
+
+/*
+** stnt1_vnum_u8_32:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_32, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, 32, z0),
+ svstnt1_vnum (pn8, x0, 32, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m1:
+** decb x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m1, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -1, z0),
+ svstnt1_vnum (pn8, x0, -1, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m2:
+** decb x0, all, mul #2
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m2, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -2, z0),
+ svstnt1_vnum (pn8, x0, -2, z0))
+
+/* Moving the constant into a register would also be OK. */
+/*
+** stnt1_vnum_u8_m3:
+** decb x0, all, mul #3
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m3, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -3, z0),
+ svstnt1_vnum (pn8, x0, -3, z0))
+
+/*
+** stnt1_vnum_u8_m4:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-4, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m4, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -4, z0),
+ svstnt1_vnum (pn8, x0, -4, z0))
+
+/*
+** stnt1_vnum_u8_m32:
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, #-32, mul vl\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m32, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -32, z0),
+ svstnt1_vnum (pn8, x0, -32, z0))
+
+/*
+** stnt1_vnum_u8_m36:
+** [^{]*
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, x[0-9]+\]
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_m36, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, -36, z0),
+ svstnt1_vnum (pn8, x0, -36, z0))
+
+/*
+** stnt1_vnum_u8_x1:
+** cntb (x[0-9]+)
+** (
+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0
+** stnt1b {z0\.b - z3\.b}, pn8, \[\2\]
+** |
+** mul (x[0-9]+), (?:x1, \1|\1, x1)
+** stnt1b {z0\.b - z3\.b}, pn8, \[x0, \3\]
+** )
+** ret
+*/
+TEST_STORE_COUNT (stnt1_vnum_u8_x1, svuint8x4_t, uint8_t,
+ svstnt1_vnum_u8_x4 (pn8, x0, x1, z0),
+ svstnt1_vnum (pn8, x0, x1, z0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b16_x2.c
new file mode 100644
index 0000000..d01e90a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b16_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b16_s64_x2 (x0, x1),
+ p1 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b16_s64_x2 (x0, x1),
+ p4 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b16_s64_x2 (x0, x1),
+ p9 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b16_s64_x2 (x0, x1),
+ p14 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b16_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b16_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b16_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b16_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b16_u64_x2 (x0, x1),
+ p4 = svwhilege_b16_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b16_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b16_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b16_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b32_x2.c
new file mode 100644
index 0000000..41e3ad7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b32_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b32_s64_x2 (x0, x1),
+ p1 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b32_s64_x2 (x0, x1),
+ p4 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b32_s64_x2 (x0, x1),
+ p9 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b32_s64_x2 (x0, x1),
+ p14 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b32_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b32_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b32_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b32_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b32_u64_x2 (x0, x1),
+ p4 = svwhilege_b32_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b32_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b32_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b32_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b64_x2.c
new file mode 100644
index 0000000..c873f49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b64_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b64_s64_x2 (x0, x1),
+ p1 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b64_s64_x2 (x0, x1),
+ p4 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b64_s64_x2 (x0, x1),
+ p9 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b64_s64_x2 (x0, x1),
+ p14 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b64_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b64_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b64_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b64_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b64_u64_x2 (x0, x1),
+ p4 = svwhilege_b64_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b64_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b64_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b64_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b8_x2.c
new file mode 100644
index 0000000..c9a9fd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_b8_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_p1_rr_s64:
+** whilege {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p1_rr_s64, int64_t,
+ p1 = svwhilege_b8_s64_x2 (x0, x1),
+ p1 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p4_rr_s64:
+** whilege {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_s64, int64_t,
+ p4 = svwhilege_b8_s64_x2 (x0, x1),
+ p4 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p9_rr_s64:
+** whilege {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p9_rr_s64, int64_t,
+ p9 = svwhilege_b8_s64_x2 (x0, x1),
+ p9 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p14_rr_s64:
+** whilege {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_rr_s64, int64_t,
+ p14 = svwhilege_b8_s64_x2 (x0, x1),
+ p14 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_s64:
+** whilege {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_s64, int64_t,
+ p4 = svwhilege_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilege_b8_s64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilege {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_s64, int64_t,
+ p4 = svwhilege_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilege_b8_s64_x2 (5, x1))
+
+/*
+** whilege_p4_r0_s64:
+** whilege {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r0_s64, int64_t,
+ p4 = svwhilege_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilege_b8_s64_x2 (x0, 0))
+
+/*
+** whilege_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilege {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p14_r5_s64, int64_t,
+ p14 = svwhilege_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilege_b8_s64_x2 (x0, 5))
+
+/*
+** whilege_p4_rr_u64:
+** whilehs {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_rr_u64, uint64_t,
+ p4 = svwhilege_b8_u64_x2 (x0, x1),
+ p4 = svwhilege_b8_x2 (x0, x1))
+
+/*
+** whilege_p4_0r_u64:
+** whilehs {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_0r_u64, uint64_t,
+ p4 = svwhilege_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilege_b8_u64_x2 (0, x1))
+
+/*
+** whilege_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_5r_u64, uint64_t,
+ p4 = svwhilege_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilege_b8_u64_x2 (5, x1))
+
+/*
+** whilege_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehs {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilege_p4_r5_u64, uint64_t,
+ p4 = svwhilege_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilege_b8_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c16.c
new file mode 100644
index 0000000..f84b0ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c16.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c16_s64 (x0, x1, 2),
+ pn0 = svwhilege_c16 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c16_s64 (x0, x1, 4),
+ pn7 = svwhilege_c16 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c16_s64 (x0, x1, 2),
+ pn8 = svwhilege_c16 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c16_s64 (x0, x1, 4),
+ pn15 = svwhilege_c16 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c16_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c16_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c16_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c16_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c16_u64 (x0, x1, 2),
+ pn8 = svwhilege_c16 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c16_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c16_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c16_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c32.c
new file mode 100644
index 0000000..8d84149
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c32.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c32_s64 (x0, x1, 2),
+ pn0 = svwhilege_c32 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c32_s64 (x0, x1, 4),
+ pn7 = svwhilege_c32 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c32_s64 (x0, x1, 2),
+ pn8 = svwhilege_c32 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c32_s64 (x0, x1, 4),
+ pn15 = svwhilege_c32 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c32_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c32_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c32_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c32_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c32_u64 (x0, x1, 2),
+ pn8 = svwhilege_c32 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c32_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c32_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c32_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c64.c
new file mode 100644
index 0000000..e7f8673
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c64.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c64_s64 (x0, x1, 2),
+ pn0 = svwhilege_c64 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c64_s64 (x0, x1, 4),
+ pn7 = svwhilege_c64 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c64_s64 (x0, x1, 2),
+ pn8 = svwhilege_c64 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c64_s64 (x0, x1, 4),
+ pn15 = svwhilege_c64 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c64_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c64_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c64_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c64_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c64_u64 (x0, x1, 2),
+ pn8 = svwhilege_c64 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c64_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c64_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c64_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c8.c
new file mode 100644
index 0000000..69859bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilege_c8.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilege_pn0_rr_2_s64:
+** whilege pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilege_c8_s64 (x0, x1, 2),
+ pn0 = svwhilege_c8 (x0, x1, 2))
+
+/*
+** whilege_pn7_rr_4_s64:
+** whilege pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilege_c8_s64 (x0, x1, 4),
+ pn7 = svwhilege_c8 (x0, x1, 4))
+
+/*
+** whilege_pn8_rr_2_s64:
+** whilege pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilege_c8_s64 (x0, x1, 2),
+ pn8 = svwhilege_c8 (x0, x1, 2))
+
+/*
+** whilege_pn15_rr_4_s64:
+** whilege pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilege_c8_s64 (x0, x1, 4),
+ pn15 = svwhilege_c8 (x0, x1, 4))
+
+/*
+** whilege_pn8_0r_2_s64:
+** whilege pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilege_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilege_c8_s64 (0, x1, 2))
+
+/*
+** whilege_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilege_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilege_c8_s64 (5, x1, 4))
+
+/*
+** whilege_pn8_r0_2_s64:
+** whilege pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilege_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilege_c8_s64 (x0, 0, 2))
+
+/*
+** whilege_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilege pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilege_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilege_c8_s64 (x0, 5, 4))
+
+/*
+** whilege_pn8_rr_2_u64:
+** whilehs pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilege_c8_u64 (x0, x1, 2),
+ pn8 = svwhilege_c8 (x0, x1, 2))
+
+/*
+** whilege_pn8_0r_4_u64:
+** whilehs pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilege_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilege_c8_u64 (0, x1, 4))
+
+/*
+** whilege_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilege_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilege_c8_u64 (5, x1, 2))
+
+/*
+** whilege_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehs pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilege_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilege_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilege_c8_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b16_x2.c
new file mode 100644
index 0000000..075cee4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b16_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b16_s64_x2 (x0, x1),
+ p1 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b16_s64_x2 (x0, x1),
+ p4 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b16_s64_x2 (x0, x1),
+ p9 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b16_s64_x2 (x0, x1),
+ p14 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b16_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b16_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b16_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b16_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b16_u64_x2 (x0, x1),
+ p4 = svwhilegt_b16_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b16_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b16_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b16_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b32_x2.c
new file mode 100644
index 0000000..ead3434
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b32_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b32_s64_x2 (x0, x1),
+ p1 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b32_s64_x2 (x0, x1),
+ p4 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b32_s64_x2 (x0, x1),
+ p9 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b32_s64_x2 (x0, x1),
+ p14 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b32_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b32_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b32_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b32_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b32_u64_x2 (x0, x1),
+ p4 = svwhilegt_b32_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b32_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b32_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b32_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b64_x2.c
new file mode 100644
index 0000000..4be2264
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b64_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b64_s64_x2 (x0, x1),
+ p1 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b64_s64_x2 (x0, x1),
+ p4 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b64_s64_x2 (x0, x1),
+ p9 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b64_s64_x2 (x0, x1),
+ p14 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b64_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b64_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b64_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b64_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b64_u64_x2 (x0, x1),
+ p4 = svwhilegt_b64_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b64_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b64_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b64_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b8_x2.c
new file mode 100644
index 0000000..c982422
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_b8_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_p1_rr_s64:
+** whilegt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p1_rr_s64, int64_t,
+ p1 = svwhilegt_b8_s64_x2 (x0, x1),
+ p1 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p4_rr_s64:
+** whilegt {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_s64, int64_t,
+ p4 = svwhilegt_b8_s64_x2 (x0, x1),
+ p4 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p9_rr_s64:
+** whilegt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p9_rr_s64, int64_t,
+ p9 = svwhilegt_b8_s64_x2 (x0, x1),
+ p9 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p14_rr_s64:
+** whilegt {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_rr_s64, int64_t,
+ p14 = svwhilegt_b8_s64_x2 (x0, x1),
+ p14 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_s64:
+** whilegt {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_s64, int64_t,
+ p4 = svwhilegt_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilegt_b8_s64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_s64, int64_t,
+ p4 = svwhilegt_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilegt_b8_s64_x2 (5, x1))
+
+/*
+** whilegt_p4_r0_s64:
+** whilegt {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r0_s64, int64_t,
+ p4 = svwhilegt_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilegt_b8_s64_x2 (x0, 0))
+
+/*
+** whilegt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilegt {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p14_r5_s64, int64_t,
+ p14 = svwhilegt_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilegt_b8_s64_x2 (x0, 5))
+
+/*
+** whilegt_p4_rr_u64:
+** whilehi {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_rr_u64, uint64_t,
+ p4 = svwhilegt_b8_u64_x2 (x0, x1),
+ p4 = svwhilegt_b8_x2 (x0, x1))
+
+/*
+** whilegt_p4_0r_u64:
+** whilehi {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_0r_u64, uint64_t,
+ p4 = svwhilegt_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilegt_b8_u64_x2 (0, x1))
+
+/*
+** whilegt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_5r_u64, uint64_t,
+ p4 = svwhilegt_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilegt_b8_u64_x2 (5, x1))
+
+/*
+** whilegt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilehi {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilegt_p4_r5_u64, uint64_t,
+ p4 = svwhilegt_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilegt_b8_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c16.c
new file mode 100644
index 0000000..f7a1c23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c16.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c16_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c16 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c16_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c16 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c16_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c16 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c16_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c16 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c16_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c16_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c16_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c16_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c16_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c16 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c16_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c16_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c16_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c32.c
new file mode 100644
index 0000000..0379cc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c32.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c32_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c32 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c32_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c32 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c32_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c32 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c32_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c32 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c32_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c32_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c32_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c32_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c32_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c32 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c32_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c32_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c32_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c64.c
new file mode 100644
index 0000000..a457499
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c64.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c64_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c64 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c64_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c64 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c64_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c64 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c64_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c64 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c64_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c64_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c64_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c64_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c64_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c64 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c64_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c64_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c64_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c8.c
new file mode 100644
index 0000000..b0465f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilegt_c8.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilegt_pn0_rr_2_s64:
+** whilegt pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilegt_c8_s64 (x0, x1, 2),
+ pn0 = svwhilegt_c8 (x0, x1, 2))
+
+/*
+** whilegt_pn7_rr_4_s64:
+** whilegt pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilegt_c8_s64 (x0, x1, 4),
+ pn7 = svwhilegt_c8 (x0, x1, 4))
+
+/*
+** whilegt_pn8_rr_2_s64:
+** whilegt pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilegt_c8_s64 (x0, x1, 2),
+ pn8 = svwhilegt_c8 (x0, x1, 2))
+
+/*
+** whilegt_pn15_rr_4_s64:
+** whilegt pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilegt_c8_s64 (x0, x1, 4),
+ pn15 = svwhilegt_c8 (x0, x1, 4))
+
+/*
+** whilegt_pn8_0r_2_s64:
+** whilegt pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilegt_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilegt_c8_s64 (0, x1, 2))
+
+/*
+** whilegt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilegt_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilegt_c8_s64 (5, x1, 4))
+
+/*
+** whilegt_pn8_r0_2_s64:
+** whilegt pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilegt_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilegt_c8_s64 (x0, 0, 2))
+
+/*
+** whilegt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilegt pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilegt_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilegt_c8_s64 (x0, 5, 4))
+
+/*
+** whilegt_pn8_rr_2_u64:
+** whilehi pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilegt_c8_u64 (x0, x1, 2),
+ pn8 = svwhilegt_c8 (x0, x1, 2))
+
+/*
+** whilegt_pn8_0r_4_u64:
+** whilehi pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilegt_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilegt_c8_u64 (0, x1, 4))
+
+/*
+** whilegt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilegt_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilegt_c8_u64 (5, x1, 2))
+
+/*
+** whilegt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilehi pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilegt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilegt_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilegt_c8_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b16_x2.c
new file mode 100644
index 0000000..324a171
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b16_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b16_s64_x2 (x0, x1),
+ p1 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b16_s64_x2 (x0, x1),
+ p4 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b16_s64_x2 (x0, x1),
+ p9 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b16_s64_x2 (x0, x1),
+ p14 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b16_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b16_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b16_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b16_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b16_u64_x2 (x0, x1),
+ p4 = svwhilele_b16_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b16_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b16_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b16_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b32_x2.c
new file mode 100644
index 0000000..3bd7c49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b32_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b32_s64_x2 (x0, x1),
+ p1 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b32_s64_x2 (x0, x1),
+ p4 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b32_s64_x2 (x0, x1),
+ p9 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b32_s64_x2 (x0, x1),
+ p14 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b32_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b32_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b32_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b32_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b32_u64_x2 (x0, x1),
+ p4 = svwhilele_b32_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b32_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b32_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b32_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b64_x2.c
new file mode 100644
index 0000000..ea40715
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b64_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b64_s64_x2 (x0, x1),
+ p1 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b64_s64_x2 (x0, x1),
+ p4 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b64_s64_x2 (x0, x1),
+ p9 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b64_s64_x2 (x0, x1),
+ p14 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b64_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b64_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b64_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b64_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b64_u64_x2 (x0, x1),
+ p4 = svwhilele_b64_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b64_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b64_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b64_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b8_x2.c
new file mode 100644
index 0000000..7d21502
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_b8_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_p1_rr_s64:
+** whilele {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p1_rr_s64, int64_t,
+ p1 = svwhilele_b8_s64_x2 (x0, x1),
+ p1 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p4_rr_s64:
+** whilele {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_s64, int64_t,
+ p4 = svwhilele_b8_s64_x2 (x0, x1),
+ p4 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p9_rr_s64:
+** whilele {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p9_rr_s64, int64_t,
+ p9 = svwhilele_b8_s64_x2 (x0, x1),
+ p9 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p14_rr_s64:
+** whilele {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_rr_s64, int64_t,
+ p14 = svwhilele_b8_s64_x2 (x0, x1),
+ p14 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_s64:
+** whilele {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_s64, int64_t,
+ p4 = svwhilele_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilele_b8_s64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilele {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_s64, int64_t,
+ p4 = svwhilele_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilele_b8_s64_x2 (5, x1))
+
+/*
+** whilele_p4_r0_s64:
+** whilele {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r0_s64, int64_t,
+ p4 = svwhilele_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilele_b8_s64_x2 (x0, 0))
+
+/*
+** whilele_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilele {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p14_r5_s64, int64_t,
+ p14 = svwhilele_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilele_b8_s64_x2 (x0, 5))
+
+/*
+** whilele_p4_rr_u64:
+** whilels {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_rr_u64, uint64_t,
+ p4 = svwhilele_b8_u64_x2 (x0, x1),
+ p4 = svwhilele_b8_x2 (x0, x1))
+
+/*
+** whilele_p4_0r_u64:
+** whilels {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_0r_u64, uint64_t,
+ p4 = svwhilele_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilele_b8_u64_x2 (0, x1))
+
+/*
+** whilele_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_5r_u64, uint64_t,
+ p4 = svwhilele_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilele_b8_u64_x2 (5, x1))
+
+/*
+** whilele_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilels {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilele_p4_r5_u64, uint64_t,
+ p4 = svwhilele_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilele_b8_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c16.c
new file mode 100644
index 0000000..053182f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c16.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c16_s64 (x0, x1, 2),
+ pn0 = svwhilele_c16 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c16_s64 (x0, x1, 4),
+ pn7 = svwhilele_c16 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c16_s64 (x0, x1, 2),
+ pn8 = svwhilele_c16 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c16_s64 (x0, x1, 4),
+ pn15 = svwhilele_c16 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c16_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c16_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c16_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c16_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c16_u64 (x0, x1, 2),
+ pn8 = svwhilele_c16 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c16_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c16_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c16_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c32.c
new file mode 100644
index 0000000..3582bc3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c32.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c32_s64 (x0, x1, 2),
+ pn0 = svwhilele_c32 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c32_s64 (x0, x1, 4),
+ pn7 = svwhilele_c32 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c32_s64 (x0, x1, 2),
+ pn8 = svwhilele_c32 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c32_s64 (x0, x1, 4),
+ pn15 = svwhilele_c32 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c32_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c32_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c32_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c32_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c32_u64 (x0, x1, 2),
+ pn8 = svwhilele_c32 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c32_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c32_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c32_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c64.c
new file mode 100644
index 0000000..9b8e7b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c64.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c64_s64 (x0, x1, 2),
+ pn0 = svwhilele_c64 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c64_s64 (x0, x1, 4),
+ pn7 = svwhilele_c64 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c64_s64 (x0, x1, 2),
+ pn8 = svwhilele_c64 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c64_s64 (x0, x1, 4),
+ pn15 = svwhilele_c64 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c64_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c64_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c64_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c64_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c64_u64 (x0, x1, 2),
+ pn8 = svwhilele_c64 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c64_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c64_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c64_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c8.c
new file mode 100644
index 0000000..724bab6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilele_c8.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilele_pn0_rr_2_s64:
+** whilele pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilele_c8_s64 (x0, x1, 2),
+ pn0 = svwhilele_c8 (x0, x1, 2))
+
+/*
+** whilele_pn7_rr_4_s64:
+** whilele pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilele_c8_s64 (x0, x1, 4),
+ pn7 = svwhilele_c8 (x0, x1, 4))
+
+/*
+** whilele_pn8_rr_2_s64:
+** whilele pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilele_c8_s64 (x0, x1, 2),
+ pn8 = svwhilele_c8 (x0, x1, 2))
+
+/*
+** whilele_pn15_rr_4_s64:
+** whilele pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilele_c8_s64 (x0, x1, 4),
+ pn15 = svwhilele_c8 (x0, x1, 4))
+
+/*
+** whilele_pn8_0r_2_s64:
+** whilele pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilele_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilele_c8_s64 (0, x1, 2))
+
+/*
+** whilele_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilele_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilele_c8_s64 (5, x1, 4))
+
+/*
+** whilele_pn8_r0_2_s64:
+** whilele pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilele_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilele_c8_s64 (x0, 0, 2))
+
+/*
+** whilele_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilele pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilele_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilele_c8_s64 (x0, 5, 4))
+
+/*
+** whilele_pn8_rr_2_u64:
+** whilels pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilele_c8_u64 (x0, x1, 2),
+ pn8 = svwhilele_c8 (x0, x1, 2))
+
+/*
+** whilele_pn8_0r_4_u64:
+** whilels pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilele_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilele_c8_u64 (0, x1, 4))
+
+/*
+** whilele_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilele_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilele_c8_u64 (5, x1, 2))
+
+/*
+** whilele_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilels pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilele_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilele_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilele_c8_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b16_x2.c
new file mode 100644
index 0000000..f92f109
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b16_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b16_s64_x2 (x0, x1),
+ p1 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b16_s64_x2 (x0, x1),
+ p4 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.h, p[0-9]+\.h}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b16_s64_x2 (x0, x1),
+ p9 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.h, p15\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b16_s64_x2 (x0, x1),
+ p14 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b16_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b16_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b16_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b16_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.h, p5\.h}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b16_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b16_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.h, p15\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b16_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b16_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.h, p5\.h}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b16_u64_x2 (x0, x1),
+ p4 = svwhilelt_b16_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.h, p5\.h}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b16_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b16_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.h, p5\.h}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b16_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b16_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.h, p5\.h}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b16_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b16_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b32_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b32_x2.c
new file mode 100644
index 0000000..2c052b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b32_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b32_s64_x2 (x0, x1),
+ p1 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b32_s64_x2 (x0, x1),
+ p4 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.s, p[0-9]+\.s}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b32_s64_x2 (x0, x1),
+ p9 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.s, p15\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b32_s64_x2 (x0, x1),
+ p14 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b32_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b32_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b32_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b32_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.s, p5\.s}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b32_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b32_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.s, p15\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b32_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b32_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.s, p5\.s}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b32_u64_x2 (x0, x1),
+ p4 = svwhilelt_b32_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.s, p5\.s}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b32_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b32_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.s, p5\.s}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b32_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b32_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.s, p5\.s}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b32_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b32_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b64_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b64_x2.c
new file mode 100644
index 0000000..fbe69b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b64_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b64_s64_x2 (x0, x1),
+ p1 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b64_s64_x2 (x0, x1),
+ p4 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.d, p[0-9]+\.d}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b64_s64_x2 (x0, x1),
+ p9 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.d, p15\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b64_s64_x2 (x0, x1),
+ p14 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b64_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b64_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b64_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b64_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.d, p5\.d}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b64_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b64_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.d, p15\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b64_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b64_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.d, p5\.d}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b64_u64_x2 (x0, x1),
+ p4 = svwhilelt_b64_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.d, p5\.d}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b64_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b64_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.d, p5\.d}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b64_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b64_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.d, p5\.d}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b64_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b64_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b8_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b8_x2.c
new file mode 100644
index 0000000..3029cfe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_b8_x2.c
@@ -0,0 +1,126 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_p1_rr_s64:
+** whilelt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p1_rr_s64, int64_t,
+ p1 = svwhilelt_b8_s64_x2 (x0, x1),
+ p1 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p4_rr_s64:
+** whilelt {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_s64, int64_t,
+ p4 = svwhilelt_b8_s64_x2 (x0, x1),
+ p4 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p9_rr_s64:
+** whilelt {p[0-9]+\.b, p[0-9]+\.b}, x0, x1
+** mov [^\n]+
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p9_rr_s64, int64_t,
+ p9 = svwhilelt_b8_s64_x2 (x0, x1),
+ p9 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p14_rr_s64:
+** whilelt {p14\.b, p15\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_rr_s64, int64_t,
+ p14 = svwhilelt_b8_s64_x2 (x0, x1),
+ p14 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_s64:
+** whilelt {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_s64, int64_t,
+ p4 = svwhilelt_b8_x2 ((int64_t) 0, x1),
+ p4 = svwhilelt_b8_s64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_s64, int64_t,
+ p4 = svwhilelt_b8_x2 ((int64_t) 5, x1),
+ p4 = svwhilelt_b8_s64_x2 (5, x1))
+
+/*
+** whilelt_p4_r0_s64:
+** whilelt {p4\.b, p5\.b}, x0, xzr
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r0_s64, int64_t,
+ p4 = svwhilelt_b8_x2 (x0, (int64_t) 0),
+ p4 = svwhilelt_b8_s64_x2 (x0, 0))
+
+/*
+** whilelt_p14_r5_s64:
+** mov (x[0-9]+), #?5
+** whilelt {p14\.b, p15\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p14_r5_s64, int64_t,
+ p14 = svwhilelt_b8_x2 (x0, (int64_t) 5),
+ p14 = svwhilelt_b8_s64_x2 (x0, 5))
+
+/*
+** whilelt_p4_rr_u64:
+** whilelo {p4\.b, p5\.b}, x0, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_rr_u64, uint64_t,
+ p4 = svwhilelt_b8_u64_x2 (x0, x1),
+ p4 = svwhilelt_b8_x2 (x0, x1))
+
+/*
+** whilelt_p4_0r_u64:
+** whilelo {p4\.b, p5\.b}, xzr, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_0r_u64, uint64_t,
+ p4 = svwhilelt_b8_x2 ((uint64_t) 0, x1),
+ p4 = svwhilelt_b8_u64_x2 (0, x1))
+
+/*
+** whilelt_p4_5r_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.b, p5\.b}, \1, x1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_5r_u64, uint64_t,
+ p4 = svwhilelt_b8_x2 ((uint64_t) 5, x1),
+ p4 = svwhilelt_b8_u64_x2 (5, x1))
+
+/*
+** whilelt_p4_r5_u64:
+** mov (x[0-9]+), #?5
+** whilelo {p4\.b, p5\.b}, x0, \1
+** ret
+*/
+TEST_COMPARE_S_X2 (whilelt_p4_r5_u64, uint64_t,
+ p4 = svwhilelt_b8_x2 (x0, (uint64_t) 5),
+ p4 = svwhilelt_b8_u64_x2 (x0, 5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c16.c
new file mode 100644
index 0000000..4891864
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c16.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.h, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c16_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c16 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.h, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c16_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c16 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c16_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c16 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.h, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c16_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c16 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.h, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c16 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c16_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.h, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c16 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c16_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.h, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c16 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c16_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c16 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c16_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.h, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c16_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c16 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.h, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c16 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c16_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.h, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c16 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c16_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.h, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c16 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c16_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c32.c
new file mode 100644
index 0000000..b2b8508
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c32.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.s, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c32_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c32 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.s, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c32_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c32 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c32_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c32 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.s, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c32_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c32 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.s, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c32 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c32_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.s, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c32 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c32_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.s, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c32 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c32_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c32 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c32_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.s, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c32_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c32 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.s, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c32 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c32_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.s, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c32 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c32_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.s, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c32 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c32_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c64.c
new file mode 100644
index 0000000..8cb44bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c64.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.d, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c64_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c64 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.d, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c64_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c64 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c64_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c64 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.d, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c64_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c64 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.d, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c64 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c64_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.d, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c64 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c64_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.d, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c64 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c64_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c64 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c64_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.d, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c64_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c64 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.d, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c64 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c64_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.d, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c64 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c64_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.d, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c64 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c64_u64 (x0, 5, 4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c8.c
new file mode 100644
index 0000000..02a64b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilelt_c8.c
@@ -0,0 +1,124 @@
+/* { dg-do assemble { target aarch64_asm_sve2p1_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sve2p1_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2p1"
+#ifdef STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** whilelt_pn0_rr_2_s64:
+** whilelt pn[0-9]+\.b, x0, x1, vlx2
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn0_rr_2_s64, int64_t,
+ pn0 = svwhilelt_c8_s64 (x0, x1, 2),
+ pn0 = svwhilelt_c8 (x0, x1, 2))
+
+/*
+** whilelt_pn7_rr_4_s64:
+** whilelt pn[0-9]+\.b, x0, x1, vlx4
+** mov [^\n]+
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn7_rr_4_s64, int64_t,
+ pn7 = svwhilelt_c8_s64 (x0, x1, 4),
+ pn7 = svwhilelt_c8 (x0, x1, 4))
+
+/*
+** whilelt_pn8_rr_2_s64:
+** whilelt pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_s64, int64_t,
+ pn8 = svwhilelt_c8_s64 (x0, x1, 2),
+ pn8 = svwhilelt_c8 (x0, x1, 2))
+
+/*
+** whilelt_pn15_rr_4_s64:
+** whilelt pn15\.b, x0, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_rr_4_s64, int64_t,
+ pn15 = svwhilelt_c8_s64 (x0, x1, 4),
+ pn15 = svwhilelt_c8 (x0, x1, 4))
+
+/*
+** whilelt_pn8_0r_2_s64:
+** whilelt pn8\.b, xzr, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_2_s64, int64_t,
+ pn8 = svwhilelt_c8 ((int64_t) 0, x1, 2),
+ pn8 = svwhilelt_c8_s64 (0, x1, 2))
+
+/*
+** whilelt_pn8_5r_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn8\.b, \1, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_4_s64, int64_t,
+ pn8 = svwhilelt_c8 ((int64_t) 5, x1, 4),
+ pn8 = svwhilelt_c8_s64 (5, x1, 4))
+
+/*
+** whilelt_pn8_r0_2_s64:
+** whilelt pn8\.b, x0, xzr, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r0_2_s64, int64_t,
+ pn8 = svwhilelt_c8 (x0, (int64_t) 0, 2),
+ pn8 = svwhilelt_c8_s64 (x0, 0, 2))
+
+/*
+** whilelt_pn15_r5_4_s64:
+** mov (x[0-9]+), #?5
+** whilelt pn15\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn15_r5_4_s64, int64_t,
+ pn15 = svwhilelt_c8 (x0, (int64_t) 5, 4),
+ pn15 = svwhilelt_c8_s64 (x0, 5, 4))
+
+/*
+** whilelt_pn8_rr_2_u64:
+** whilelo pn8\.b, x0, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_rr_2_u64, uint64_t,
+ pn8 = svwhilelt_c8_u64 (x0, x1, 2),
+ pn8 = svwhilelt_c8 (x0, x1, 2))
+
+/*
+** whilelt_pn8_0r_4_u64:
+** whilelo pn8\.b, xzr, x1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_0r_4_u64, uint64_t,
+ pn8 = svwhilelt_c8 ((uint64_t) 0, x1, 4),
+ pn8 = svwhilelt_c8_u64 (0, x1, 4))
+
+/*
+** whilelt_pn8_5r_2_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.b, \1, x1, vlx2
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_5r_2_u64, uint64_t,
+ pn8 = svwhilelt_c8 ((uint64_t) 5, x1, 2),
+ pn8 = svwhilelt_c8_u64 (5, x1, 2))
+
+/*
+** whilelt_pn8_r5_4_u64:
+** mov (x[0-9]+), #?5
+** whilelo pn8\.b, x0, \1, vlx4
+** ret
+*/
+TEST_COMPARE_S_C (whilelt_pn8_r5_4_u64, uint64_t,
+ pn8 = svwhilelt_c8 (x0, (uint64_t) 5, 4),
+ pn8 = svwhilelt_c8_u64 (x0, 5, 4))