aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJu-Zhe Zhong <juzhe.zhong@rivai.ai>2023-03-22 10:49:56 +0800
committerKito Cheng <kito.cheng@sifive.com>2023-03-23 11:14:12 +0800
commitcd0c433e5faba9a18f64881cd761a53a530aa798 (patch)
treeb780f5a6bd08e01de13c3d9b96c7e6cad1f9b679 /gcc
parent116a8678840f9f52ec14639ff07e302a8c429f32 (diff)
downloadgcc-cd0c433e5faba9a18f64881cd761a53a530aa798.zip
gcc-cd0c433e5faba9a18f64881cd761a53a530aa798.tar.gz
gcc-cd0c433e5faba9a18f64881cd761a53a530aa798.tar.bz2
RISC-V: Fix LRA issue for LMUL < 1 vector spillings [PR109244]
In order to decrease the memory traffic, we don't use whole register load/store for the LMUL less than 1 and mask mode, so those case will require one extra general purpose register for setting up VL register, but it's not allowed during LRA process, so we defined few special move patterns used for LRA, which will defer the expansion after LRA. gcc/ChangeLog: PR target/109244 * config/riscv/riscv-protos.h (emit_vlmax_vsetvl): Define as global. (emit_vlmax_op): Ditto. * config/riscv/riscv-v.cc (get_sew): New function. (emit_vlmax_vsetvl): Adapt function. (emit_pred_op): Ditto. (emit_vlmax_op): Ditto. (emit_nonvlmax_op): Ditto. (legitimize_move): Fix LRA ICE. (gen_no_side_effects_vsetvl_rtx): Adapt function. * config/riscv/vector.md (@mov<V_FRACT:mode><P:mode>_lra): New pattern. (@mov<VB:mode><P:mode>_lra): Ditto. (*mov<V_FRACT:mode><P:mode>_lra): Ditto. (*mov<VB:mode><P:mode>_lra): Ditto. gcc/testsuite/ChangeLog: PR target/109244 * g++.target/riscv/rvv/base/pr109244.C: New test. * gcc.target/riscv/rvv/base/binop_vv_constraint-4.c: Adapt testcase. * gcc.target/riscv/rvv/base/binop_vv_constraint-6.c: Ditto. * gcc.target/riscv/rvv/base/binop_vx_constraint-127.c: Ditto. * gcc.target/riscv/rvv/base/spill-1.c: Ditto. * gcc.target/riscv/rvv/base/spill-2.c: Ditto. * gcc.target/riscv/rvv/base/spill-3.c: Ditto. * gcc.target/riscv/rvv/base/spill-5.c: Ditto. * gcc.target/riscv/rvv/base/spill-7.c: Ditto. * g++.target/riscv/rvv/base/bug-18.C: New test. * gcc.target/riscv/rvv/base/merge_constraint-3.c: New test. * gcc.target/riscv/rvv/base/merge_constraint-4.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/riscv-protos.h2
-rw-r--r--gcc/config/riscv/riscv-v.cc67
-rw-r--r--gcc/config/riscv/vector.md56
-rw-r--r--gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C139
-rw-r--r--gcc/testsuite/g++.target/riscv/rvv/base/pr109244.C76
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c1
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c95
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c28
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c194
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c136
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c76
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c36
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c161
15 files changed, 751 insertions, 319 deletions
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 78c47ec..e41f65a 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -158,7 +158,9 @@ bool check_builtin_call (location_t, vec<location_t>, unsigned int,
tree, unsigned int, tree *);
bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
bool legitimize_move (rtx, rtx, machine_mode);
+void emit_vlmax_vsetvl (machine_mode, rtx);
void emit_vlmax_op (unsigned, rtx, rtx, machine_mode);
+void emit_vlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
void emit_nonvlmax_op (unsigned, rtx, rtx, rtx, machine_mode);
enum vlmul_type get_vlmul (machine_mode);
unsigned int get_ratio (machine_mode);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 9b83ef6..d7b77fd 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -98,6 +98,15 @@ private:
expand_operand m_ops[MAX_OPERANDS];
};
+static unsigned
+get_sew (machine_mode mode)
+{
+ unsigned int sew = GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+ ? 8
+ : GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+ return sew;
+}
+
/* Return true if X is a const_vector with all duplicate elements, which is in
the range between MINVAL and MAXVAL. */
bool
@@ -109,13 +118,10 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
&& IN_RANGE (INTVAL (elt), minval, maxval));
}
-static rtx
-emit_vlmax_vsetvl (machine_mode vmode)
+void
+emit_vlmax_vsetvl (machine_mode vmode, rtx vl)
{
- rtx vl = gen_reg_rtx (Pmode);
- unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL
- ? 8
- : GET_MODE_BITSIZE (GET_MODE_INNER (vmode));
+ unsigned int sew = get_sew (vmode);
enum vlmul_type vlmul = get_vlmul (vmode);
unsigned int ratio = calculate_ratio (sew, vlmul);
@@ -125,8 +131,6 @@ emit_vlmax_vsetvl (machine_mode vmode)
const0_rtx));
else
emit_insn (gen_vlmax_avl (Pmode, vl, gen_int_mode (ratio, Pmode)));
-
- return vl;
}
/* Calculate SEW/LMUL ratio. */
@@ -166,7 +170,7 @@ calculate_ratio (unsigned int sew, enum vlmul_type vlmul)
/* Emit an RVV unmask && vl mov from SRC to DEST. */
static void
emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
- machine_mode mask_mode)
+ machine_mode mask_mode, bool vlmax_p)
{
insn_expander<8> e;
machine_mode mode = GET_MODE (dest);
@@ -186,17 +190,18 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
e.add_input_operand (len, Pmode);
else
{
- rtx vlmax = emit_vlmax_vsetvl (mode);
+ rtx vlmax = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (mode, vlmax);
e.add_input_operand (vlmax, Pmode);
}
if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
e.add_policy_operand (get_prefer_tail_policy (), get_prefer_mask_policy ());
- if (len)
- e.add_avl_type_operand (avl_type::NONVLMAX);
- else
+ if (vlmax_p)
e.add_avl_type_operand (avl_type::VLMAX);
+ else
+ e.add_avl_type_operand (avl_type::NONVLMAX);
e.expand ((enum insn_code) icode, MEM_P (dest) || MEM_P (src));
}
@@ -204,14 +209,21 @@ emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
void
emit_vlmax_op (unsigned icode, rtx dest, rtx src, machine_mode mask_mode)
{
- emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode);
+ emit_pred_op (icode, NULL_RTX, dest, src, NULL_RTX, mask_mode, true);
+}
+
+void
+emit_vlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
+ machine_mode mask_mode)
+{
+ emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, true);
}
void
emit_nonvlmax_op (unsigned icode, rtx dest, rtx src, rtx len,
machine_mode mask_mode)
{
- emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode);
+ emit_pred_op (icode, NULL_RTX, dest, src, len, mask_mode, false);
}
static void
@@ -265,6 +277,20 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode)
expand_const_vector (dest, src, mask_mode);
return true;
}
+
+ /* In order to decrease the memory traffic, we don't use whole register
+ * load/store for the LMUL less than 1 and mask mode, so those case will
+ * require one extra general purpose register, but it's not allowed during LRA
+ * process, so we have a special move pattern used for LRA, which will defer
+ * the expansion after LRA. */
+ if ((known_lt (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ && lra_in_progress)
+ {
+ emit_insn (gen_mov_lra (mode, Pmode, dest, src));
+ return true;
+ }
+
if (known_ge (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR)
&& GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
{
@@ -274,6 +300,13 @@ legitimize_move (rtx dest, rtx src, machine_mode mask_mode)
return false;
}
+
+ if (register_operand (src, mode) && register_operand (dest, mode))
+ {
+ emit_insn (gen_rtx_SET (dest, src));
+ return true;
+ }
+
if (!register_operand (src, mode) && !register_operand (dest, mode))
{
rtx tmp = gen_reg_rtx (mode);
@@ -540,9 +573,7 @@ force_vector_length_operand (rtx vl)
static rtx
gen_no_side_effects_vsetvl_rtx (machine_mode vmode, rtx vl, rtx avl)
{
- unsigned int sew = GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL
- ? 8
- : GET_MODE_BITSIZE (GET_MODE_INNER (vmode));
+ unsigned int sew = get_sew (vmode);
return gen_vsetvl_no_side_effects (Pmode, vl, avl, gen_int_mode (sew, Pmode),
gen_int_mode (get_vlmul (vmode), Pmode),
const0_rtx, const0_rtx);
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 20978a5..1ddc1d3 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -635,6 +635,62 @@
[(set_attr "type" "vmov")
(set_attr "mode" "<MODE>")])
+(define_expand "@mov<V_FRACT:mode><P:mode>_lra"
+ [(parallel
+ [(set (match_operand:V_FRACT 0 "reg_or_mem_operand")
+ (match_operand:V_FRACT 1 "reg_or_mem_operand"))
+ (clobber (match_scratch:P 2))])]
+ "TARGET_VECTOR && (lra_in_progress || reload_completed)"
+{})
+
+(define_expand "@mov<VB:mode><P:mode>_lra"
+ [(parallel
+ [(set (match_operand:VB 0 "reg_or_mem_operand")
+ (match_operand:VB 1 "reg_or_mem_operand"))
+ (clobber (match_scratch:P 2))])]
+ "TARGET_VECTOR && (lra_in_progress || reload_completed)"
+{})
+
+(define_insn_and_split "*mov<V_FRACT:mode><P:mode>_lra"
+ [(set (match_operand:V_FRACT 0 "reg_or_mem_operand" "=vr, m,vr")
+ (match_operand:V_FRACT 1 "reg_or_mem_operand" " m,vr,vr"))
+ (clobber (match_scratch:P 2 "=&r,&r,X"))]
+ "TARGET_VECTOR && (lra_in_progress || reload_completed)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (REG_P (operands[0]) && REG_P (operands[1]))
+ emit_insn (gen_rtx_SET (operands[0], operands[1]));
+ else
+ {
+ riscv_vector::emit_vlmax_vsetvl (<V_FRACT:MODE>mode, operands[2]);
+ riscv_vector::emit_vlmax_op (code_for_pred_mov (<V_FRACT:MODE>mode),
+ operands[0], operands[1], operands[2], <VM>mode);
+ }
+ DONE;
+})
+
+(define_insn_and_split "*mov<VB:mode><P:mode>_lra"
+ [(set (match_operand:VB 0 "reg_or_mem_operand" "=vr, m,vr")
+ (match_operand:VB 1 "reg_or_mem_operand" " m,vr,vr"))
+ (clobber (match_scratch:P 2 "=&r,&r,X"))]
+ "TARGET_VECTOR && (lra_in_progress || reload_completed)"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (REG_P (operands[0]) && REG_P (operands[1]))
+ emit_insn (gen_rtx_SET (operands[0], operands[1]));
+ else
+ {
+ riscv_vector::emit_vlmax_vsetvl (<VB:MODE>mode, operands[2]);
+ riscv_vector::emit_vlmax_op (code_for_pred_mov (<VB:MODE>mode),
+ operands[0], operands[1], operands[2], <VB:MODE>mode);
+ }
+ DONE;
+})
+
;; -----------------------------------------------------------------
;; ---- Duplicate Operations
;; -----------------------------------------------------------------
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C b/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C
new file mode 100644
index 0000000..868ec1e
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/bug-18.C
@@ -0,0 +1,139 @@
+/* { dg-do compile { target { riscv_vector } } } */
+
+#include <iostream>
+#include "riscv_vector.h"
+using std::cerr;
+using std::endl;
+template < class , class b > int c(b val) {
+ return val;
+}
+auto &f32(c< float, uint32_t >);
+template < class d >
+bool check(d , d , size_t );
+int main() {
+ size_t e ;
+ int16_t f[] {};
+ size_t g ;
+ int32_t i[] {4784};
+ size_t aa = 4;
+ int16_t ab[] {2313};
+ int16_t j[] {7114 };
+ int16_t k[] {7696 };
+ uint32_t l[] {9951 };
+ int32_t m[] {2659 };
+ uint16_t n[] {7537 };
+ int32_t o[] {05733}
+ ;
+ uint32_t p[] {7010090 };
+ uint32_t q[] {21060 };
+ uint32_t r[] {2273 };
+ uint32_t s[] {4094366 };
+ int16_t ac[] {11880 };
+ int16_t t[] {10988};
+ int16_t ad[] {30376};
+ int8_t u[] {};
+ int8_t ae[] {7};
+ int8_t v[] {40};
+ int8_t af[] {6};
+ int16_t w[] {4077 };
+ int16_t x[] {7932 };
+ int8_t y[] {3};
+ int8_t z[] {4};
+ uint16_t ag[] {2831};
+ int16_t ah[] {10412 };
+ int16_t ai[] {6823};
+ int32_t aj[] {8572 };
+ int32_t ak[] {9999 };
+ uint32_t al[] {50166962 };
+ uint32_t am[] {9781 };
+ int8_t an[] {9, 35};
+ float ao[] {222.65, 22.79};
+ float ap[] {126.10, 13.92};
+ int64_t aq[] {508727, 5556};
+ int16_t ar[] {2861 };
+ int16_t as[] {21420};
+ int16_t at[] {4706 };
+ uint32_t au ;
+ uint32_t av = 600295662;
+ size_t aw ;
+ int16_t ax = 13015;
+ uint32_t ay ;
+ uint16_t az = 10652;
+ int32_t ba ;
+ int8_t bb ;
+ int64_t bc = 40183771683589512;
+
+asm volatile ("ttt":::"memory");
+ vint16mf4_t bd = __riscv_vle16_v_i16mf4(j, 2);
+ vuint32mf2_t be = __riscv_vle32_v_u32mf2(l, 2);
+ vint32mf2_t bf = __riscv_vle32_v_i32mf2(m, 2);
+ vuint16mf4_t bg = __riscv_vle16_v_u16mf4(n, 2);
+ vint8mf4_t bh ;
+ vuint32m2_t bi = __riscv_vle32_v_u32m2(p, 2);
+ vuint32m2_t bj = __riscv_vle32_v_u32m2(q, 2);
+ vuint32m2_t bk = __riscv_vle32_v_u32m2(r, 2);
+ vuint32m2_t bl = __riscv_vle32_v_u32m2(s, 2);
+ vint16m1_t bm = __riscv_vle16_v_i16m1(ac, 2);
+ vint16m1_t bn = __riscv_vle16_v_i16m1(t, 2);
+ vint8mf2_t bo = __riscv_vle8_v_i8mf2(u, 1);
+ vint8mf2_t bp = __riscv_vle8_v_i8mf2(ae, 1);
+ vint8mf8_t bq = __riscv_vle8_v_i8mf8(af, 1);
+ vint16mf4_t br = __riscv_vle16_v_i16mf4(w, 2);
+ vint16mf4_t bs = __riscv_vle16_v_i16mf4(x, 2);
+ vint8mf8_t bt = __riscv_vle8_v_i8mf8(y, 1);
+ vint8mf8_t bu = __riscv_vle8_v_i8mf8(z, 1);
+ vuint16mf4_t bv = __riscv_vle16_v_u16mf4(ag, 1);
+ vint16mf4_t bw = __riscv_vle16_v_i16mf4(ah, 2);
+ vint16mf4_t bx = __riscv_vle16_v_i16mf4(ai, 2);
+ vint32mf2_t by = __riscv_vle32_v_i32mf2(aj, 2);
+ vint32mf2_t bz = __riscv_vle32_v_i32mf2(ak, 2);
+ vuint32mf2_t ca = __riscv_vle32_v_u32mf2(al, 2);
+ vuint32mf2_t cb = __riscv_vle32_v_u32mf2(am, 2);
+ vint8mf8_t cc = __riscv_vle8_v_i8mf8(an, 2);
+ vfloat32mf2_t cd = __riscv_vle32_v_f32mf2(ao, 2);
+ vfloat32mf2_t ce = __riscv_vle32_v_f32mf2(ap, 2);
+ vint64m1_t cf = __riscv_vle64_v_i64m1(aq, 2);
+ vint16mf4_t cg = __riscv_vle16_v_i16mf4(ar, 2);
+ vint16mf4_t ch = __riscv_vle16_v_i16mf4(as, 2);
+ vint16mf4_t var_62 = __riscv_vle16_v_i16mf4(at, 2);
+ vbool64_t var_20 = __riscv_vmadc_vx_u32mf2_b64(be, ay, 2);
+ int8_t var_17 = __riscv_vmv_x_s_i8mf4_i8(bh);
+ vbool16_t var_28 = __riscv_vmsltu_vv_u32m2_b16(bk, bl, 2);
+ vint8mf2_t var_14 = __riscv_vadd_vv_i8mf2(bo, bp, 1);
+ vbool64_t var_8 = __riscv_vmseq_vv_i16mf4_b64(br, bs, 2);
+ vbool64_t var_42 = __riscv_vmsbc_vx_u16mf4_b64(bv, az, 1);
+ vbool64_t var_46 = __riscv_vmsge_vx_i32mf2_b64(by, ba, 2);
+ vint16mf4_t var_4 = __riscv_vncvt_x_x_w_i16mf4(bz, 2);
+ vbool64_t var_51 = __riscv_vmsgt_vx_i8mf8_b64(cc, bb, 2);
+ vbool64_t var_56 = __riscv_vmfne_vv_f32mf2_b64(cd, ce, 2);
+ vbool64_t var_55 = __riscv_vmseq_vx_i64m1_b64(cf, bc, 2);
+ vuint32m2_t var_16 = __riscv_vslideup_vx_u32m2_mu(var_28, bi, bj, aw, 2);
+ vint8mf2_t var_12 = __riscv_vmulh_vv_i8mf2(var_14, var_14, 1);
+ vint16mf4_t var_0 = __riscv_vdiv_vv_i16mf4_mu(var_8, var_4, ch, var_62, 2);
+ vuint32m2_t var_13 = __riscv_vsub_vx_u32m2(var_16, av, 2);
+ int8_t var_9 = __riscv_vmv_x_s_i8mf2_i8(var_12);
+ vint16mf4_t var_19 = __riscv_vor_vx_i16mf4_mu(var_20, var_0, bd, ax, 2);
+ uint32_t var_10 = __riscv_vmv_x_s_u32m2_u32(var_13);
+ vint8mf8_t var_7 = __riscv_vmadd_vx_i8mf8_mu(var_42, bt, var_9, bu, 1);
+ __riscv_vse16_v_i16mf4(k, var_19, 2);
+ vuint32mf2_t var_3 =
+ __riscv_vslide1down_vx_u32mf2_mu(var_51, ca, cb, var_10, 2);
+ if (check(k, ab, aa))
+ cerr << "check 8 fails" << endl;
+ vbool64_t var_2 = __riscv_vmsne_vx_u32mf2_b64_mu(var_55, var_56, var_3, au, 2);
+ vint16mf4_t var_1 = __riscv_vssub_vv_i16mf4_mu(var_2, var_0, var_4, cg, 2);
+ vint16mf4_t var_5 = __riscv_vxor_vv_i16mf4_mu(var_46, var_1, bw, bx, 2);
+ vint32mf2_t var_18 = __riscv_vwmaccsu_vv_i32mf2(bf, var_1, bg, 2);
+ vint8mf8_t var_6 = __riscv_vncvt_x_x_w_i8mf8_mu(var_8, var_7, var_5, 1);
+ vint16m1_t var_15 = __riscv_vredand_vs_i16mf4_i16m1_tu(bm, var_5, bn, 2);
+ __riscv_vse32_v_i32mf2(o, var_18, 2);
+ vbool64_t var_11 = __riscv_vmsge_vx_i8mf8_b64(var_6, var_17, 1);
+ __riscv_vse16_v_i16m1(ad, var_15, 1);
+ if (check(o, i, g))
+ cerr << "check 1 fails" << endl;
+ __riscv_vse8_v_i8mf8_m(var_11, v, bq, 1);
+ if (check(ad, f, e))
+ cerr << "check 4 fails" << endl;
+ cerr << "check 7 fails" << endl;
+ return 0;
+}
diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr109244.C b/gcc/testsuite/g++.target/riscv/rvv/base/pr109244.C
new file mode 100644
index 0000000..eebfc23
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr109244.C
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */
+typedef int a;
+using c = float;
+template < typename > using e = int;
+#pragma riscv intrinsic "vector"
+template < typename, int, int f > struct aa {
+ using g = int;
+ template < typename > static constexpr int h() { return f; }
+ template < typename i > using ab = aa< i, 0, h< i >() >;
+};
+template < int f > struct p { using j = aa< float, 6, f >; };
+template < int f > struct k { using j = typename p< f >::j; };
+template < typename, int f > using ac = typename k< f >::j;
+template < class ad > using l = typename ad::g;
+template < class g, class ad > using ab = typename ad::ab< g >;
+template < class ad > using ae = ab< e< ad >, ad >;
+template < int m > vuint32mf2_t ai(aa< a, m, -1 >, a aj) {
+ return __riscv_vmv_v_x_u32mf2(aj, 0);
+}
+template < int m > vfloat32mf2_t ai(aa< c, m, -1 >, c);
+template < class ad > using ak = decltype(ai(ad(), l< ad >()));
+template < class ad > ak< ad > al(ad d) {
+ ae< decltype(d) > am;
+ return an(d, ai(am, 0));
+}
+template < typename g, int m > vuint8mf2_t ao(aa< g, m, -1 >, vuint32mf2_t n) {
+ return __riscv_vreinterpret_v_u32mf2_u8mf2(n);
+}
+template < int m > vuint32mf2_t ap(aa< a, m, -1 >, vuint8mf2_t n) {
+ return __riscv_vreinterpret_v_u8mf2_u32mf2(n);
+}
+template < typename g, int m > vuint8mf2_t ao(aa< g, m, -1 >, vfloat32mf2_t n) {
+ return __riscv_vreinterpret_v_u32mf2_u8mf2(
+ __riscv_vreinterpret_v_f32mf2_u32mf2(n));
+}
+template < int m > vfloat32mf2_t ap(aa< c, m, -1 >, vuint8mf2_t);
+template < class ad, class aq > ak< ad > an(ad d, aq n) {
+ return ap(d, ao(d, n));
+}
+vbool64_t av(vuint32mf2_t, vuint32mf2_t);
+template < class ad > bool ba(ad, vbool64_t);
+template < class ad > using bb = decltype(al(ad()));
+template < typename g > using be = ac< g, -1 >;
+struct bf {
+ template < class ad > bool bh(ad, bb< ad > bi) {
+ ae< ad > am;
+ return ba(am, av(an(am, bi), al(am)));
+ }
+};
+int bo;
+template < class ad, class bl, typename g > void o(ad d, bl bn, g) {
+ bb< ad > bq = al(d);
+ for (; bo;) {
+ int br = bn.bh(d, bq);
+ if (__builtin_expect(br, 0))
+ for (;;)
+ ;
+ }
+}
+template < class ad, class bl, typename g > void bs(ad d, bl bn, g) {
+ g bu;
+ o(d, bn, bu);
+}
+template < class ad, class bl, typename g >
+void bv(ad d, bl bn, g *, int, g *bt) {
+ bs(d, bn, bt);
+}
+float by;
+int bz;
+float ca;
+void b() {
+ be< float > d;
+ bf bn;
+ bv(d, bn, &by, bz, &ca);
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c
index 1b0afed0..552c264 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-4.c
@@ -24,3 +24,4 @@ void f2 (void * in, void *out, int32_t x)
__riscv_vsm_v_b32 (out, m4, 4);
}
+/* { dg-final { scan-assembler-times {vmv} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c
index 384e230..6a65fb5 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vv_constraint-6.c
@@ -24,4 +24,5 @@ void f2 (void * in, void *out, int32_t x)
__riscv_vsm_v_b32 (out, m4, 4);
}
+/* { dg-final { scan-assembler-times {vmv} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c
index a353a7a..3933c35 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-127.c
@@ -24,4 +24,4 @@ void f2 (void * in, void *out, int32_t x)
__riscv_vsm_v_b32 (out, m4, 4);
}
-
+/* { dg-final { scan-assembler-times {vmv} 2 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c
new file mode 100644
index 0000000..d9cbc85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-3.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f1 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
+ vuint16m1_t v2 = __riscv_vle16_v_u16m1 (base2, vl);
+ vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
+ vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28");
+
+ vbool16_t v = __riscv_vmseq_vv_u16m1_b16_mu(m1,m2,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ __riscv_vsm_v_b16 (out,v,vl);
+}
+
+void f2 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift)
+{
+ vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
+ vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
+ vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool16_t v = __riscv_vmseq_vx_u16m1_b16_mu(m1,m2,v1,shift,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+
+ __riscv_vsm_v_b16 (out,v,vl);
+}
+
+void f3 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, size_t shift)
+{
+ vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
+ vuint16m1_t v2 = __riscv_vle16_v_u16m1 (base2, vl);
+ vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
+ vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28");
+
+ vbool16_t v = __riscv_vmsltu_vv_u16m1_b16_mu(m1,m2,v1,v2,vl);
+ asm volatile("#" ::
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27");
+
+ __riscv_vsm_v_b16 (out,v,vl);
+}
+
+void f4 (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift)
+{
+ vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
+ vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
+ vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool16_t v = __riscv_vmsltu_vx_u16m1_b16_mu(m1,m2,v1,shift,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+
+ __riscv_vsm_v_b16 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-not {vmv} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c
new file mode 100644
index 0000000..db245b0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/merge_constraint-4.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
+
+#include "riscv_vector.h"
+
+void f (void *base1,void *base2,void *base3,void *base4,void *out,size_t vl, uint16_t shift)
+{
+ vuint16m1_t v1 = __riscv_vle16_v_u16m1 (base1, vl);
+ vbool16_t m1 = __riscv_vlm_v_b16 (base3, vl);
+ vbool16_t m2 = __riscv_vlm_v_b16 (base4, vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29");
+
+ vbool16_t v = __riscv_vmsltu_vx_u16m1_b16_mu(m1,m2,v1,shift,vl);
+ asm volatile("#" ::
+ : "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+ "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
+ "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25",
+ "v26", "v27", "v28", "v29", "v30", "v31");
+
+
+ __riscv_vsm_v_b16 (out,v,vl);
+}
+
+/* { dg-final { scan-assembler-times {vmv} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c
index b1220c4..2f2d858 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c
@@ -10,20 +10,20 @@
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,3
-** slli\ta3,a2,3
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse8.v\tv24,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,3
-** slli\ta3,a2,3
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle8.v\tv24,0\(a3\)
-** vse8.v\tv24,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -42,21 +42,21 @@ spill_1 (int8_t *in, int8_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e8,mf4,ta,ma
-** vle8.v\tv24,0\(a0\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse8.v\tv24,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle8.v\tv24,0\(a3\)
-** vse8.v\tv24,0\(a1\)
+** vle8.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -75,17 +75,17 @@ spill_2 (int8_t *in, int8_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e8,mf2,ta,ma
-** vle8.v\tv24,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse8.v\tv24,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle8.v\tv24,0\(a3\)
-** vse8.v\tv24,0\(a1\)
+** vle8.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -104,10 +104,10 @@ spill_3 (int8_t *in, int8_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** ...
-** vs1r.v\tv24,0\(sp\)
+** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re8.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -128,10 +128,10 @@ spill_4 (int8_t *in, int8_t *out)
** slli\tt1,t0,1
** sub\tsp,sp,t1
** ...
-** vs2r.v\tv24,0\(sp\)
+** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re8.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -152,10 +152,10 @@ spill_5 (int8_t *in, int8_t *out)
** slli\tt1,t0,2
** sub\tsp,sp,t1
** ...
-** vs4r.v\tv24,0\(sp\)
+** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re8.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -176,10 +176,10 @@ spill_6 (int8_t *in, int8_t *out)
** slli\tt1,t0,3
** sub\tsp,sp,t1
** ...
-** vs8r.v\tv24,0\(sp\)
+** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re8.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -199,21 +199,21 @@ spill_7 (int8_t *in, int8_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e8,mf8,ta,ma
-** vle8.v\tv24,0\(a0\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,3
-** slli\ta3,a2,3
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse8.v\tv24,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,3
-** slli\ta3,a2,3
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle8.v\tv24,0\(a3\)
-** vse8.v\tv24,0\(a1\)
+** vle8.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -232,21 +232,21 @@ spill_8 (uint8_t *in, uint8_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e8,mf4,ta,ma
-** vle8.v\tv24,0\(a0\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse8.v\tv24,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle8.v\tv24,0\(a3\)
-** vse8.v\tv24,0\(a1\)
+** vle8.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -265,17 +265,17 @@ spill_9 (uint8_t *in, uint8_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e8,mf2,ta,ma
-** vle8.v\tv24,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse8.v\tv24,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle8.v\tv24,0\(a3\)
-** vse8.v\tv24,0\(a1\)
+** vle8.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -294,10 +294,10 @@ spill_10 (uint8_t *in, uint8_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** ...
-** vs1r.v\tv24,0\(sp\)
+** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re8.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -318,10 +318,10 @@ spill_11 (uint8_t *in, uint8_t *out)
** slli\tt1,t0,1
** sub\tsp,sp,t1
** ...
-** vs2r.v\tv24,0\(sp\)
+** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re8.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -342,10 +342,10 @@ spill_12 (uint8_t *in, uint8_t *out)
** slli\tt1,t0,2
** sub\tsp,sp,t1
** ...
-** vs4r.v\tv24,0\(sp\)
+** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re8.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -366,10 +366,10 @@ spill_13 (uint8_t *in, uint8_t *out)
** slli\tt1,t0,3
** sub\tsp,sp,t1
** ...
-** vs8r.v\tv24,0\(sp\)
+** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re8.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c
index ca1904b..4bcaf4d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c
@@ -10,21 +10,21 @@
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e16,mf4,ta,ma
-** vle16.v\tv24,0\(a0\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse16.v\tv24,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle16.v\tv24,0\(a3\)
-** vse16.v\tv24,0\(a1\)
+** vle16.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -43,17 +43,17 @@ spill_2 (int16_t *in, int16_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e16,mf2,ta,ma
-** vle16.v\tv24,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse16.v\tv24,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle16.v\tv24,0\(a3\)
-** vse16.v\tv24,0\(a1\)
+** vle16.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -72,10 +72,10 @@ spill_3 (int16_t *in, int16_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** ...
-** vs1r.v\tv24,0\(sp\)
+** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re16.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -96,10 +96,10 @@ spill_4 (int16_t *in, int16_t *out)
** slli\tt1,t0,1
** sub\tsp,sp,t1
** ...
-** vs2r.v\tv24,0\(sp\)
+** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re16.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -120,10 +120,10 @@ spill_5 (int16_t *in, int16_t *out)
** slli\tt1,t0,2
** sub\tsp,sp,t1
** ...
-** vs4r.v\tv24,0\(sp\)
+** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re16.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -144,10 +144,10 @@ spill_6 (int16_t *in, int16_t *out)
** slli\tt1,t0,3
** sub\tsp,sp,t1
** ...
-** vs8r.v\tv24,0\(sp\)
+** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re16.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -167,21 +167,21 @@ spill_7 (int16_t *in, int16_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e16,mf4,ta,ma
-** vle16.v\tv24,0\(a0\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vse16.v\tv24,0\(a3\)
-** ...
-** csrr\ta2,vlenb
-** srli\ta2,a2,2
-** slli\ta3,a2,2
-** sub\ta3,a3,a2
-** add\ta3,a3,sp
-** vle16.v\tv24,0\(a3\)
-** vse16.v\tv24,0\(a1\)
+** vle16.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** sub\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -200,17 +200,17 @@ spill_9 (uint16_t *in, uint16_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e16,mf2,ta,ma
-** vle16.v\tv24,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse16.v\tv24,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle16.v\tv24,0\(a3\)
-** vse16.v\tv24,0\(a1\)
+** vle16.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle16.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse16.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -229,10 +229,10 @@ spill_10 (uint16_t *in, uint16_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** ...
-** vs1r.v\tv24,0\(sp\)
+** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re16.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -253,10 +253,10 @@ spill_11 (uint16_t *in, uint16_t *out)
** slli\tt1,t0,1
** sub\tsp,sp,t1
** ...
-** vs2r.v\tv24,0\(sp\)
+** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re16.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -277,10 +277,10 @@ spill_12 (uint16_t *in, uint16_t *out)
** slli\tt1,t0,2
** sub\tsp,sp,t1
** ...
-** vs4r.v\tv24,0\(sp\)
+** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re16.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -301,10 +301,10 @@ spill_13 (uint16_t *in, uint16_t *out)
** slli\tt1,t0,3
** sub\tsp,sp,t1
** ...
-** vs8r.v\tv24,0\(sp\)
+** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re16.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c
index 2039ca3..82d685e 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c
@@ -10,17 +10,17 @@
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e32,mf2,ta,ma
-** vle32.v\tv24,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse32.v\tv24,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle32.v\tv24,0\(a3\)
-** vse32.v\tv24,0\(a1\)
+** vle32.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -39,10 +39,10 @@ spill_3 (int32_t *in, int32_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** ...
-** vs1r.v\tv24,0\(sp\)
+** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re32.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -63,10 +63,10 @@ spill_4 (int32_t *in, int32_t *out)
** slli\tt1,t0,1
** sub\tsp,sp,t1
** ...
-** vs2r.v\tv24,0\(sp\)
+** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re32.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -87,10 +87,10 @@ spill_5 (int32_t *in, int32_t *out)
** slli\tt1,t0,2
** sub\tsp,sp,t1
** ...
-** vs4r.v\tv24,0\(sp\)
+** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re32.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -111,10 +111,10 @@ spill_6 (int32_t *in, int32_t *out)
** slli\tt1,t0,3
** sub\tsp,sp,t1
** ...
-** vs8r.v\tv24,0\(sp\)
+** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re32.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -134,17 +134,17 @@ spill_7 (int32_t *in, int32_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e32,mf2,ta,ma
-** vle32.v\tv24,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse32.v\tv24,0\(a3\)
-** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle32.v\tv24,0\(a3\)
-** vse32.v\tv24,0\(a1\)
+** vle32.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
+** ...
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -163,10 +163,10 @@ spill_10 (uint32_t *in, uint32_t *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** ...
-** vs1r.v\tv24,0\(sp\)
+** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re32.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -187,10 +187,10 @@ spill_11 (uint32_t *in, uint32_t *out)
** slli\tt1,t0,1
** sub\tsp,sp,t1
** ...
-** vs2r.v\tv24,0\(sp\)
+** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re32.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -211,10 +211,10 @@ spill_12 (uint32_t *in, uint32_t *out)
** slli\tt1,t0,2
** sub\tsp,sp,t1
** ...
-** vs4r.v\tv24,0\(sp\)
+** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re32.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -235,10 +235,10 @@ spill_13 (uint32_t *in, uint32_t *out)
** slli\tt1,t0,3
** sub\tsp,sp,t1
** ...
-** vs8r.v\tv24,0\(sp\)
+** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re32.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c
index 3c228a0..5b3f75f 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c
@@ -10,17 +10,17 @@
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** vsetvli\ta5,zero,e32,mf2,ta,ma
-** vle32.v\tv24,0\(a0\)
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vse32.v\tv24,0\(a3\)
+** vle32.v\tv[0-9]+,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
** ...
-** csrr\ta3,vlenb
-** srli\ta3,a3,1
-** add\ta3,a3,sp
-** vle32.v\tv24,0\(a3\)
-** vse32.v\tv24,0\(a1\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,sp
+** vle32.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
** csrr\tt0,vlenb
** add\tsp,sp,t0
** ...
@@ -39,10 +39,10 @@ spill_3 (float *in, float *out)
** csrr\tt0,vlenb
** sub\tsp,sp,t0
** ...
-** vs1r.v\tv24,0\(sp\)
+** vs1r.v\tv[0-9]+,0\(sp\)
** ...
** vl1re32.v\tv2,0\(sp\)
-** vs1r.v\tv2,0\(a1\)
+** vs1r.v\tv2,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -63,10 +63,10 @@ spill_4 (float *in, float *out)
** slli\tt1,t0,1
** sub\tsp,sp,t1
** ...
-** vs2r.v\tv24,0\(sp\)
+** vs2r.v\tv[0-9]+,0\(sp\)
** ...
** vl2re32.v\tv4,0\(sp\)
-** vs2r.v\tv4,0\(a1\)
+** vs2r.v\tv4,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -87,10 +87,10 @@ spill_5 (float *in, float *out)
** slli\tt1,t0,2
** sub\tsp,sp,t1
** ...
-** vs4r.v\tv24,0\(sp\)
+** vs4r.v\tv[0-9]+,0\(sp\)
** ...
** vl4re32.v\tv8,0\(sp\)
-** vs4r.v\tv8,0\(a1\)
+** vs4r.v\tv8,0\([a-x0-9]+\)
** ...
** jr\tra
*/
@@ -111,10 +111,10 @@ spill_6 (float *in, float *out)
** slli\tt1,t0,3
** sub\tsp,sp,t1
** ...
-** vs8r.v\tv24,0\(sp\)
+** vs8r.v\tv[0-9]+,0\(sp\)
** ...
** vl8re32.v\tv16,0\(sp\)
-** vs8r.v\tv16,0\(a1\)
+** vs8r.v\tv16,0\([a-x0-9]+\)
** ...
** jr\tra
*/
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c
index cf1eea2..2bc5455 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-7.c
@@ -7,89 +7,92 @@
/*
** spill:
-** csrr\tt0,vlenb
-** slli\tt1,t0,4
-** sub\tsp,sp,t1
-** vsetvli\ta3,zero,e8,mf8,ta,ma
-** vle8.v\tv24,0\(a0\)
-** csrr\ta5,vlenb
-** srli\ta5,a5,3
-** add\ta5,a5,sp
-** vse8.v\tv24,0\(a5\)
-** addi\ta5,a0,1
-** vsetvli\ta4,zero,e8,mf4,ta,ma
-** vle8.v\tv24,0\(a5\)
-** csrr\ta5,vlenb
-** srli\ta5,a5,2
-** add\ta5,a5,sp
-** vse8.v\tv24,0\(a5\)
-** addi\ta2,a0,2
-** vsetvli\ta5,zero,e8,mf2,ta,ma
-** vle8.v\tv24,0\(a2\)
-** csrr\ta2,vlenb
-** srli\ta2,a2,1
-** add\ta2,a2,sp
-** vse8.v\tv24,0\(a2\)
-** addi\ta2,a0,3
-** vl1re8.v\tv24,0\(a2\)
-** csrr\ta2,vlenb
-** add\ta2,a2,sp
-** vs1r.v\tv24,0\(a2\)
-** addi\ta2,a0,4
-** vl2re8.v\tv24,0\(a2\)
-** csrr\tt3,vlenb
-** slli\ta2,t3,1
-** add\ta2,a2,sp
-** vs2r.v\tv24,0\(a2\)
-** addi\ta2,a0,5
-** vl4re8.v\tv24,0\(a2\)
-** mv\ta2,t3
-** slli\tt3,t3,2
-** add\tt3,t3,sp
-** vs4r.v\tv24,0\(t3\)
-** addi\ta0,a0,6
-** vl8re8.v\tv24,0\(a0\)
-** slli\ta0,a2,3
-** add\ta0,a0,sp
-** vs8r.v\tv24,0\(a0\)
+** csrr\t[a-x0-9]+,vlenb
+** slli\t[a-x0-9]+,[a-x0-9]+,4
+** sub\tsp,[a-x0-9]+,[a-x0-9]+
+** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,1
+** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,2
+** vsetvli\t[a-x0-9]+,zero,e8,mf2,ta,ma
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,3
+** vl1re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vs1r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,4
+** vl2re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** slli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,5
+** vl4re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** mv\t[a-x0-9]+,[a-x0-9]+
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vs4r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,6
+** vl8re8.v\tv[0-9]+,0\([a-x0-9]+\)
** ...
-** srli\ta0,a2,3
-** add\ta0,a0,sp
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vs8r.v\tv[0-9]+,0\([a-x0-9]+\)
** ...
-** vle8.v\tv27,0\(a0\)
-** vse8.v\tv27,0\(a1\)
-** addi\ta3,a1,1
-** srli\ta0,a2,2
-** add\ta0,a0,sp
+** srli\t[a-x0-9]+,[a-x0-9]+,3
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
** ...
-** vle8.v\tv27,0\(a0\)
-** vse8.v\tv27,0\(a3\)
-** addi\ta4,a1,2
-** srli\ta3,a2,1
-** add\ta3,a3,sp
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,1
+** srli\t[a-x0-9]+,[a-x0-9]+,2
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
** ...
-** vle8.v\tv27,0\(a3\)
-** vse8.v\tv27,0\(a4\)
-** addi\ta5,a1,3
-** add\ta4,a2,sp
-** vl1re8.v\tv25,0\(a4\)
-** vs1r.v\tv25,0\(a5\)
-** addi\ta5,a1,4
-** slli\ta4,a2,1
-** add\ta4,a4,sp
-** vl2re8.v\tv26,0\(a4\)
-** vs2r.v\tv26,0\(a5\)
-** addi\ta5,a1,5
-** vl4re8.v\tv28,0\(t3\)
-** vs4r.v\tv28,0\(a5\)
-** addi\ta1,a1,6
-** slli\ta5,a2,3
-** add\ta5,a5,sp
-** vl8re8.v\tv24,0\(a5\)
-** vs8r.v\tv24,0\(a1\)
-** csrr\tt0,vlenb
-** slli\tt1,t0,4
-** add\tsp,sp,t1
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,2
+** srli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** ...
+** vle8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,3
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vl1re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vs1r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,4
+** slli\t[a-x0-9]+,[a-x0-9]+,1
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vl2re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vs2r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,5
+** slli\t[a-x0-9]+,[a-x0-9]+,2
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vl4re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vs4r.v\tv[0-9]+,0\([a-x0-9]+\)
+** addi\t[a-x0-9]+,[a-x0-9]+,6
+** slli\t[a-x0-9]+,[a-x0-9]+,3
+** add\t[a-x0-9]+,[a-x0-9]+,[a-x0-9]+
+** vl8re8.v\tv[0-9]+,0\([a-x0-9]+\)
+** vs8r.v\tv[0-9]+,0\([a-x0-9]+\)
+** csrr\t[a-x0-9]+,vlenb
+** slli\t[a-x0-9]+,[a-x0-9]+,4
+** add\tsp,[a-x0-9]+,[a-x0-9]+
** ...
** jr\tra
*/