aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll39
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv64.mir3
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll116
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll31
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll24
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll132
-rw-r--r--llvm/test/CodeGen/RISCV/abds-neg.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/abds.ll94
-rw-r--r--llvm/test/CodeGen/RISCV/addimm-mulimm.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/aext-to-sext.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/atomic-signext.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/bfloat-convert.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll40
-rw-r--r--llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll96
-rw-r--r--llvm/test/CodeGen/RISCV/div-by-constant.ll20
-rw-r--r--llvm/test/CodeGen/RISCV/double-convert-strict.ll18
-rw-r--r--llvm/test/CodeGen/RISCV/double-convert.ll18
-rw-r--r--llvm/test/CodeGen/RISCV/float-convert-strict.ll32
-rw-r--r--llvm/test/CodeGen/RISCV/float-convert.ll32
-rw-r--r--llvm/test/CodeGen/RISCV/fpclamptosat.ll88
-rw-r--r--llvm/test/CodeGen/RISCV/half-convert-strict.ll42
-rw-r--r--llvm/test/CodeGen/RISCV/half-convert.ll60
-rw-r--r--llvm/test/CodeGen/RISCV/iabs.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/interrupt-attr.ll5616
-rw-r--r--llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll38
-rw-r--r--llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/machine-combiner.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/memcmp-optsize.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/memcmp.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/mul.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/neg-abs.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/overflow-intrinsics.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/pr145360.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/prefer-w-inst.mir4
-rw-r--r--llvm/test/CodeGen/RISCV/rotl-rotr.ll96
-rw-r--r--llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll36
-rw-r--r--llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/rv64xtheadbb.ll48
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zba.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zbb.ll62
-rw-r--r--llvm/test/CodeGen/RISCV/rv64zbkb.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll144
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll7
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll80
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll344
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir50
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll35
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll429
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll83
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vscale-power-of-two.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/select.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/sextw-removal.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/shifts.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/shl-cttz.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/srem-vector-lkk.ll52
-rw-r--r--llvm/test/CodeGen/RISCV/typepromotion-overflow.ll2
-rw-r--r--llvm/test/CodeGen/RISCV/urem-lkk.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll44
-rw-r--r--llvm/test/CodeGen/RISCV/urem-vector-lkk.ll32
-rw-r--r--llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll1901
-rw-r--r--llvm/test/CodeGen/RISCV/xqciac.ll38
-rw-r--r--llvm/test/CodeGen/RISCV/xtheadfmemidx.ll128
-rw-r--r--llvm/test/CodeGen/RISCV/xtheadmemidx.ll759
-rw-r--r--llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll6
86 files changed, 2923 insertions, 8309 deletions
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll
index 4b999b8..6864afe 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll
@@ -66,7 +66,7 @@ define i32 @udiv_constant_add(i32 %a) nounwind {
; RV64IM-NEXT: srli a2, a2, 32
; RV64IM-NEXT: mul a1, a2, a1
; RV64IM-NEXT: srli a1, a1, 32
-; RV64IM-NEXT: subw a0, a0, a1
+; RV64IM-NEXT: sub a0, a0, a1
; RV64IM-NEXT: srliw a0, a0, 1
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: srliw a0, a0, 2
@@ -79,7 +79,7 @@ define i32 @udiv_constant_add(i32 %a) nounwind {
; RV64IMZB-NEXT: zext.w a2, a0
; RV64IMZB-NEXT: mul a1, a2, a1
; RV64IMZB-NEXT: srli a1, a1, 32
-; RV64IMZB-NEXT: subw a0, a0, a1
+; RV64IMZB-NEXT: sub a0, a0, a1
; RV64IMZB-NEXT: srliw a0, a0, 1
; RV64IMZB-NEXT: add a0, a0, a1
; RV64IMZB-NEXT: srliw a0, a0, 2
@@ -250,7 +250,7 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
; RV64-NEXT: zext.b a2, a0
; RV64-NEXT: mul a1, a2, a1
; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: subw a0, a0, a1
+; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: zext.b a0, a0
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: add a0, a0, a1
@@ -414,8 +414,7 @@ define i32 @sdiv_constant_srai(i32 %a) nounwind {
; RV64-NEXT: addi a1, a1, 1639
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: mul a0, a0, a1
-; RV64-NEXT: srai a0, a0, 32
-; RV64-NEXT: sraiw a0, a0, 1
+; RV64-NEXT: srai a0, a0, 33
; RV64-NEXT: srliw a1, a0, 31
; RV64-NEXT: addw a0, a0, a1
; RV64-NEXT: ret
@@ -656,8 +655,6 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind {
; RV32IM-NEXT: srai a0, a0, 24
; RV32IM-NEXT: mul a0, a0, a1
; RV32IM-NEXT: slli a0, a0, 16
-; RV32IM-NEXT: srai a0, a0, 24
-; RV32IM-NEXT: slli a0, a0, 24
; RV32IM-NEXT: srai a0, a0, 25
; RV32IM-NEXT: zext.b a1, a0
; RV32IM-NEXT: srli a1, a1, 7
@@ -670,9 +667,7 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind {
; RV32IMZB-NEXT: sext.b a0, a0
; RV32IMZB-NEXT: mul a0, a0, a1
; RV32IMZB-NEXT: sext.h a0, a0
-; RV32IMZB-NEXT: srai a0, a0, 8
-; RV32IMZB-NEXT: sext.b a0, a0
-; RV32IMZB-NEXT: srai a0, a0, 1
+; RV32IMZB-NEXT: srai a0, a0, 9
; RV32IMZB-NEXT: zext.b a1, a0
; RV32IMZB-NEXT: srli a1, a1, 7
; RV32IMZB-NEXT: add a0, a0, a1
@@ -685,8 +680,6 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind {
; RV64IM-NEXT: srai a0, a0, 56
; RV64IM-NEXT: mul a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 48
-; RV64IM-NEXT: srai a0, a0, 56
-; RV64IM-NEXT: slli a0, a0, 56
; RV64IM-NEXT: srai a0, a0, 57
; RV64IM-NEXT: zext.b a1, a0
; RV64IM-NEXT: srli a1, a1, 7
@@ -699,9 +692,7 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind {
; RV64IMZB-NEXT: sext.b a0, a0
; RV64IMZB-NEXT: mul a0, a0, a1
; RV64IMZB-NEXT: sext.h a0, a0
-; RV64IMZB-NEXT: srai a0, a0, 8
-; RV64IMZB-NEXT: sext.b a0, a0
-; RV64IMZB-NEXT: srai a0, a0, 1
+; RV64IMZB-NEXT: srai a0, a0, 9
; RV64IMZB-NEXT: zext.b a1, a0
; RV64IMZB-NEXT: srli a1, a1, 7
; RV64IMZB-NEXT: add a0, a0, a1
@@ -816,7 +807,7 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV64IM-NEXT: mul a1, a2, a1
; RV64IM-NEXT: slli a1, a1, 48
; RV64IM-NEXT: srai a1, a1, 56
-; RV64IM-NEXT: subw a1, a1, a0
+; RV64IM-NEXT: sub a1, a1, a0
; RV64IM-NEXT: slli a1, a1, 56
; RV64IM-NEXT: srai a0, a1, 58
; RV64IM-NEXT: zext.b a1, a0
@@ -906,8 +897,6 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind {
; RV32IM-NEXT: addi a1, a1, 1639
; RV32IM-NEXT: srai a0, a0, 16
; RV32IM-NEXT: mul a0, a0, a1
-; RV32IM-NEXT: srai a0, a0, 16
-; RV32IM-NEXT: slli a0, a0, 16
; RV32IM-NEXT: srai a0, a0, 17
; RV32IM-NEXT: slli a1, a0, 16
; RV32IM-NEXT: srli a1, a1, 16
@@ -921,9 +910,7 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind {
; RV32IMZB-NEXT: addi a1, a1, 1639
; RV32IMZB-NEXT: sext.h a0, a0
; RV32IMZB-NEXT: mul a0, a0, a1
-; RV32IMZB-NEXT: srai a0, a0, 16
-; RV32IMZB-NEXT: sext.h a0, a0
-; RV32IMZB-NEXT: srai a0, a0, 1
+; RV32IMZB-NEXT: srai a0, a0, 17
; RV32IMZB-NEXT: zext.h a1, a0
; RV32IMZB-NEXT: srli a1, a1, 15
; RV32IMZB-NEXT: add a0, a0, a1
@@ -936,9 +923,7 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind {
; RV64IM-NEXT: addi a1, a1, 1639
; RV64IM-NEXT: srai a0, a0, 48
; RV64IM-NEXT: mul a0, a0, a1
-; RV64IM-NEXT: sraiw a0, a0, 16
-; RV64IM-NEXT: slli a0, a0, 48
-; RV64IM-NEXT: srai a0, a0, 49
+; RV64IM-NEXT: sraiw a0, a0, 17
; RV64IM-NEXT: slli a1, a0, 48
; RV64IM-NEXT: srli a1, a1, 48
; RV64IM-NEXT: srli a1, a1, 15
@@ -951,9 +936,7 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind {
; RV64IMZB-NEXT: addi a1, a1, 1639
; RV64IMZB-NEXT: sext.h a0, a0
; RV64IMZB-NEXT: mul a0, a0, a1
-; RV64IMZB-NEXT: sraiw a0, a0, 16
-; RV64IMZB-NEXT: sext.h a0, a0
-; RV64IMZB-NEXT: srai a0, a0, 1
+; RV64IMZB-NEXT: sraiw a0, a0, 17
; RV64IMZB-NEXT: zext.h a1, a0
; RV64IMZB-NEXT: srli a1, a1, 15
; RV64IMZB-NEXT: add a0, a0, a1
@@ -1071,7 +1054,7 @@ define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
; RV64IM-NEXT: srai a2, a2, 48
; RV64IM-NEXT: mul a1, a2, a1
; RV64IM-NEXT: sraiw a1, a1, 16
-; RV64IM-NEXT: subw a1, a1, a0
+; RV64IM-NEXT: sub a1, a1, a0
; RV64IM-NEXT: slli a1, a1, 48
; RV64IM-NEXT: srai a0, a1, 51
; RV64IM-NEXT: slli a1, a0, 48
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll
index a49e94f..620c5ec 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-convert.ll
@@ -246,17 +246,11 @@ define double @fcvt_d_wu(i32 %a) nounwind {
}
define double @fcvt_d_wu_load(ptr %p) nounwind {
-; RV32IFD-LABEL: fcvt_d_wu_load:
-; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: lw a0, 0(a0)
-; RV32IFD-NEXT: fcvt.d.wu fa0, a0
-; RV32IFD-NEXT: ret
-;
-; RV64IFD-LABEL: fcvt_d_wu_load:
-; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: lwu a0, 0(a0)
-; RV64IFD-NEXT: fcvt.d.wu fa0, a0
-; RV64IFD-NEXT: ret
+; CHECKIFD-LABEL: fcvt_d_wu_load:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: lw a0, 0(a0)
+; CHECKIFD-NEXT: fcvt.d.wu fa0, a0
+; CHECKIFD-NEXT: ret
;
; RV32I-LABEL: fcvt_d_wu_load:
; RV32I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll
index fa09362..bbea792 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-convert.ll
@@ -232,17 +232,11 @@ define float @fcvt_s_wu(i32 %a) nounwind {
}
define float @fcvt_s_wu_load(ptr %p) nounwind {
-; RV32IF-LABEL: fcvt_s_wu_load:
-; RV32IF: # %bb.0:
-; RV32IF-NEXT: lw a0, 0(a0)
-; RV32IF-NEXT: fcvt.s.wu fa0, a0
-; RV32IF-NEXT: ret
-;
-; RV64IF-LABEL: fcvt_s_wu_load:
-; RV64IF: # %bb.0:
-; RV64IF-NEXT: lwu a0, 0(a0)
-; RV64IF-NEXT: fcvt.s.wu fa0, a0
-; RV64IF-NEXT: ret
+; CHECKIF-LABEL: fcvt_s_wu_load:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: lw a0, 0(a0)
+; CHECKIF-NEXT: fcvt.s.wu fa0, a0
+; CHECKIF-NEXT: ret
;
; RV32I-LABEL: fcvt_s_wu_load:
; RV32I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv64.mir
index 78a2227b..a7c1c63 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-abs-rv64.mir
@@ -88,8 +88,7 @@ body: |
; RV64I-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASSERT_SEXT]], [[ASHR]]
; RV64I-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 32
; RV64I-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[SEXT_INREG]], [[ASHR]]
- ; RV64I-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[XOR]], 32
- ; RV64I-NEXT: $x10 = COPY [[SEXT_INREG1]](s64)
+ ; RV64I-NEXT: $x10 = COPY [[XOR]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
;
; RV64ZBB-LABEL: name: abs_i32
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll
index 8a786fc..46d1661 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll
@@ -29,7 +29,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotl_32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sllw a1, a0, a1
; RV64I-NEXT: srlw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -55,7 +55,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
;
; RV64XTHEADBB-LABEL: rotl_32:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: negw a2, a1
+; RV64XTHEADBB-NEXT: neg a2, a1
; RV64XTHEADBB-NEXT: sllw a1, a0, a1
; RV64XTHEADBB-NEXT: srlw a0, a0, a2
; RV64XTHEADBB-NEXT: or a0, a1, a0
@@ -78,7 +78,7 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotr_32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srlw a1, a0, a1
; RV64I-NEXT: sllw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -104,7 +104,7 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind {
;
; RV64XTHEADBB-LABEL: rotr_32:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: negw a2, a1
+; RV64XTHEADBB-NEXT: neg a2, a1
; RV64XTHEADBB-NEXT: srlw a1, a0, a1
; RV64XTHEADBB-NEXT: sllw a0, a0, a2
; RV64XTHEADBB-NEXT: or a0, a1, a0
@@ -167,7 +167,7 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
;
; RV64I-LABEL: rotl_64:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sll a1, a0, a1
; RV64I-NEXT: srl a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -276,7 +276,7 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
;
; RV64XTHEADBB-LABEL: rotl_64:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: negw a2, a1
+; RV64XTHEADBB-NEXT: neg a2, a1
; RV64XTHEADBB-NEXT: sll a1, a0, a1
; RV64XTHEADBB-NEXT: srl a0, a0, a2
; RV64XTHEADBB-NEXT: or a0, a1, a0
@@ -340,7 +340,7 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
;
; RV64I-LABEL: rotr_64:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srl a1, a0, a1
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -451,7 +451,7 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
;
; RV64XTHEADBB-LABEL: rotr_64:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: negw a2, a1
+; RV64XTHEADBB-NEXT: neg a2, a1
; RV64XTHEADBB-NEXT: srl a1, a0, a1
; RV64XTHEADBB-NEXT: sll a0, a0, a2
; RV64XTHEADBB-NEXT: or a0, a1, a0
@@ -474,7 +474,7 @@ define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotl_32_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sllw a1, a0, a1
; RV64I-NEXT: srlw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -490,7 +490,7 @@ define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
;
; RV64ZBB-LABEL: rotl_32_mask:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: negw a2, a1
+; RV64ZBB-NEXT: neg a2, a1
; RV64ZBB-NEXT: sllw a1, a0, a1
; RV64ZBB-NEXT: srlw a0, a0, a2
; RV64ZBB-NEXT: or a0, a1, a0
@@ -506,7 +506,7 @@ define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
;
; RV64XTHEADBB-LABEL: rotl_32_mask:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: negw a2, a1
+; RV64XTHEADBB-NEXT: neg a2, a1
; RV64XTHEADBB-NEXT: sllw a1, a0, a1
; RV64XTHEADBB-NEXT: srlw a0, a0, a2
; RV64XTHEADBB-NEXT: or a0, a1, a0
@@ -531,7 +531,7 @@ define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64I-LABEL: rotl_32_mask_and_63_and_31:
; RV64I: # %bb.0:
; RV64I-NEXT: sllw a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -547,7 +547,7 @@ define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64ZBB-LABEL: rotl_32_mask_and_63_and_31:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sllw a2, a0, a1
-; RV64ZBB-NEXT: negw a1, a1
+; RV64ZBB-NEXT: neg a1, a1
; RV64ZBB-NEXT: srlw a0, a0, a1
; RV64ZBB-NEXT: or a0, a2, a0
; RV64ZBB-NEXT: ret
@@ -563,7 +563,7 @@ define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_32_mask_and_63_and_31:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sllw a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srlw a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -632,7 +632,7 @@ define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotr_32_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srlw a1, a0, a1
; RV64I-NEXT: sllw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -648,7 +648,7 @@ define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind {
;
; RV64ZBB-LABEL: rotr_32_mask:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: negw a2, a1
+; RV64ZBB-NEXT: neg a2, a1
; RV64ZBB-NEXT: srlw a1, a0, a1
; RV64ZBB-NEXT: sllw a0, a0, a2
; RV64ZBB-NEXT: or a0, a1, a0
@@ -664,7 +664,7 @@ define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind {
;
; RV64XTHEADBB-LABEL: rotr_32_mask:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: negw a2, a1
+; RV64XTHEADBB-NEXT: neg a2, a1
; RV64XTHEADBB-NEXT: srlw a1, a0, a1
; RV64XTHEADBB-NEXT: sllw a0, a0, a2
; RV64XTHEADBB-NEXT: or a0, a1, a0
@@ -689,7 +689,7 @@ define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64I-LABEL: rotr_32_mask_and_63_and_31:
; RV64I: # %bb.0:
; RV64I-NEXT: srlw a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -705,7 +705,7 @@ define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64ZBB-LABEL: rotr_32_mask_and_63_and_31:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: srlw a2, a0, a1
-; RV64ZBB-NEXT: negw a1, a1
+; RV64ZBB-NEXT: neg a1, a1
; RV64ZBB-NEXT: sllw a0, a0, a1
; RV64ZBB-NEXT: or a0, a2, a0
; RV64ZBB-NEXT: ret
@@ -721,7 +721,7 @@ define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_32_mask_and_63_and_31:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srlw a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sllw a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -829,7 +829,7 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
;
; RV64I-LABEL: rotl_64_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sll a1, a0, a1
; RV64I-NEXT: srl a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -884,7 +884,7 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
;
; RV64ZBB-LABEL: rotl_64_mask:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: negw a2, a1
+; RV64ZBB-NEXT: neg a2, a1
; RV64ZBB-NEXT: sll a1, a0, a1
; RV64ZBB-NEXT: srl a0, a0, a2
; RV64ZBB-NEXT: or a0, a1, a0
@@ -939,7 +939,7 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
;
; RV64XTHEADBB-LABEL: rotl_64_mask:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: negw a2, a1
+; RV64XTHEADBB-NEXT: neg a2, a1
; RV64XTHEADBB-NEXT: sll a1, a0, a1
; RV64XTHEADBB-NEXT: srl a0, a0, a2
; RV64XTHEADBB-NEXT: or a0, a1, a0
@@ -1005,7 +1005,7 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64I-LABEL: rotl_64_mask_and_127_and_63:
; RV64I: # %bb.0:
; RV64I-NEXT: sll a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: srl a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -1062,7 +1062,7 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64ZBB-LABEL: rotl_64_mask_and_127_and_63:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: sll a2, a0, a1
-; RV64ZBB-NEXT: negw a1, a1
+; RV64ZBB-NEXT: neg a1, a1
; RV64ZBB-NEXT: srl a0, a0, a1
; RV64ZBB-NEXT: or a0, a2, a0
; RV64ZBB-NEXT: ret
@@ -1119,7 +1119,7 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_64_mask_and_127_and_63:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sll a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srl a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -1277,7 +1277,7 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
;
; RV64I-LABEL: rotr_64_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srl a1, a0, a1
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -1331,7 +1331,7 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
;
; RV64ZBB-LABEL: rotr_64_mask:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: negw a2, a1
+; RV64ZBB-NEXT: neg a2, a1
; RV64ZBB-NEXT: srl a1, a0, a1
; RV64ZBB-NEXT: sll a0, a0, a2
; RV64ZBB-NEXT: or a0, a1, a0
@@ -1385,7 +1385,7 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
;
; RV64XTHEADBB-LABEL: rotr_64_mask:
; RV64XTHEADBB: # %bb.0:
-; RV64XTHEADBB-NEXT: negw a2, a1
+; RV64XTHEADBB-NEXT: neg a2, a1
; RV64XTHEADBB-NEXT: srl a1, a0, a1
; RV64XTHEADBB-NEXT: sll a0, a0, a2
; RV64XTHEADBB-NEXT: or a0, a1, a0
@@ -1451,7 +1451,7 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64I-LABEL: rotr_64_mask_and_127_and_63:
; RV64I: # %bb.0:
; RV64I-NEXT: srl a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -1508,7 +1508,7 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64ZBB-LABEL: rotr_64_mask_and_127_and_63:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: srl a2, a0, a1
-; RV64ZBB-NEXT: negw a1, a1
+; RV64ZBB-NEXT: neg a1, a1
; RV64ZBB-NEXT: sll a0, a0, a1
; RV64ZBB-NEXT: or a0, a2, a0
; RV64ZBB-NEXT: ret
@@ -1565,7 +1565,7 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_64_mask_and_127_and_63:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srl a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sll a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -1701,7 +1701,7 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
; RV64I: # %bb.0:
; RV64I-NEXT: andi a3, a2, 31
; RV64I-NEXT: sllw a4, a0, a2
-; RV64I-NEXT: negw a3, a3
+; RV64I-NEXT: neg a3, a3
; RV64I-NEXT: srlw a0, a0, a3
; RV64I-NEXT: or a0, a4, a0
; RV64I-NEXT: sllw a1, a1, a2
@@ -1737,7 +1737,7 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: andi a3, a2, 31
; RV64XTHEADBB-NEXT: sllw a4, a0, a2
-; RV64XTHEADBB-NEXT: negw a3, a3
+; RV64XTHEADBB-NEXT: neg a3, a3
; RV64XTHEADBB-NEXT: srlw a0, a0, a3
; RV64XTHEADBB-NEXT: or a0, a4, a0
; RV64XTHEADBB-NEXT: sllw a1, a1, a2
@@ -1822,7 +1822,7 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV64I: # %bb.0:
; RV64I-NEXT: andi a3, a2, 63
; RV64I-NEXT: sll a4, a0, a2
-; RV64I-NEXT: negw a3, a3
+; RV64I-NEXT: neg a3, a3
; RV64I-NEXT: srl a0, a0, a3
; RV64I-NEXT: or a0, a4, a0
; RV64I-NEXT: sll a1, a1, a2
@@ -1972,7 +1972,7 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: andi a3, a2, 63
; RV64XTHEADBB-NEXT: sll a4, a0, a2
-; RV64XTHEADBB-NEXT: negw a3, a3
+; RV64XTHEADBB-NEXT: neg a3, a3
; RV64XTHEADBB-NEXT: srl a0, a0, a3
; RV64XTHEADBB-NEXT: or a0, a4, a0
; RV64XTHEADBB-NEXT: sll a1, a1, a2
@@ -2002,7 +2002,7 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
; RV64I: # %bb.0:
; RV64I-NEXT: andi a3, a2, 31
; RV64I-NEXT: srlw a4, a0, a2
-; RV64I-NEXT: negw a3, a3
+; RV64I-NEXT: neg a3, a3
; RV64I-NEXT: sllw a0, a0, a3
; RV64I-NEXT: or a0, a4, a0
; RV64I-NEXT: sllw a1, a1, a2
@@ -2038,7 +2038,7 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: andi a3, a2, 31
; RV64XTHEADBB-NEXT: srlw a4, a0, a2
-; RV64XTHEADBB-NEXT: negw a3, a3
+; RV64XTHEADBB-NEXT: neg a3, a3
; RV64XTHEADBB-NEXT: sllw a0, a0, a3
; RV64XTHEADBB-NEXT: or a0, a4, a0
; RV64XTHEADBB-NEXT: sllw a1, a1, a2
@@ -2125,7 +2125,7 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV64I: # %bb.0:
; RV64I-NEXT: andi a3, a2, 63
; RV64I-NEXT: srl a4, a0, a2
-; RV64I-NEXT: negw a3, a3
+; RV64I-NEXT: neg a3, a3
; RV64I-NEXT: sll a0, a0, a3
; RV64I-NEXT: or a0, a4, a0
; RV64I-NEXT: sll a1, a1, a2
@@ -2279,7 +2279,7 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: andi a3, a2, 63
; RV64XTHEADBB-NEXT: srl a4, a0, a2
-; RV64XTHEADBB-NEXT: negw a3, a3
+; RV64XTHEADBB-NEXT: neg a3, a3
; RV64XTHEADBB-NEXT: sll a0, a0, a3
; RV64XTHEADBB-NEXT: or a0, a4, a0
; RV64XTHEADBB-NEXT: sll a1, a1, a2
@@ -2312,8 +2312,8 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
; RV64I-NEXT: andi a3, a2, 31
; RV64I-NEXT: sllw a4, a0, a2
; RV64I-NEXT: sllw a2, a1, a2
-; RV64I-NEXT: negw a5, a3
-; RV64I-NEXT: negw a3, a3
+; RV64I-NEXT: neg a5, a3
+; RV64I-NEXT: neg a3, a3
; RV64I-NEXT: srlw a0, a0, a5
; RV64I-NEXT: srlw a1, a1, a3
; RV64I-NEXT: or a0, a4, a0
@@ -2353,8 +2353,8 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
; RV64XTHEADBB-NEXT: andi a3, a2, 31
; RV64XTHEADBB-NEXT: sllw a4, a0, a2
; RV64XTHEADBB-NEXT: sllw a2, a1, a2
-; RV64XTHEADBB-NEXT: negw a5, a3
-; RV64XTHEADBB-NEXT: negw a3, a3
+; RV64XTHEADBB-NEXT: neg a5, a3
+; RV64XTHEADBB-NEXT: neg a3, a3
; RV64XTHEADBB-NEXT: srlw a0, a0, a5
; RV64XTHEADBB-NEXT: srlw a1, a1, a3
; RV64XTHEADBB-NEXT: or a0, a4, a0
@@ -2464,7 +2464,7 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV64I-NEXT: andi a3, a2, 63
; RV64I-NEXT: sll a4, a0, a2
; RV64I-NEXT: sll a2, a1, a2
-; RV64I-NEXT: negw a3, a3
+; RV64I-NEXT: neg a3, a3
; RV64I-NEXT: srl a0, a0, a3
; RV64I-NEXT: srl a1, a1, a3
; RV64I-NEXT: or a0, a4, a0
@@ -2664,7 +2664,7 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV64XTHEADBB-NEXT: andi a3, a2, 63
; RV64XTHEADBB-NEXT: sll a4, a0, a2
; RV64XTHEADBB-NEXT: sll a2, a1, a2
-; RV64XTHEADBB-NEXT: negw a3, a3
+; RV64XTHEADBB-NEXT: neg a3, a3
; RV64XTHEADBB-NEXT: srl a0, a0, a3
; RV64XTHEADBB-NEXT: srl a1, a1, a3
; RV64XTHEADBB-NEXT: or a0, a4, a0
@@ -2697,8 +2697,8 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
; RV64I-NEXT: andi a3, a2, 31
; RV64I-NEXT: srlw a4, a0, a2
; RV64I-NEXT: srlw a2, a1, a2
-; RV64I-NEXT: negw a5, a3
-; RV64I-NEXT: negw a3, a3
+; RV64I-NEXT: neg a5, a3
+; RV64I-NEXT: neg a3, a3
; RV64I-NEXT: sllw a0, a0, a5
; RV64I-NEXT: sllw a1, a1, a3
; RV64I-NEXT: or a0, a4, a0
@@ -2738,8 +2738,8 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
; RV64XTHEADBB-NEXT: andi a3, a2, 31
; RV64XTHEADBB-NEXT: srlw a4, a0, a2
; RV64XTHEADBB-NEXT: srlw a2, a1, a2
-; RV64XTHEADBB-NEXT: negw a5, a3
-; RV64XTHEADBB-NEXT: negw a3, a3
+; RV64XTHEADBB-NEXT: neg a5, a3
+; RV64XTHEADBB-NEXT: neg a3, a3
; RV64XTHEADBB-NEXT: sllw a0, a0, a5
; RV64XTHEADBB-NEXT: sllw a1, a1, a3
; RV64XTHEADBB-NEXT: or a0, a4, a0
@@ -2850,7 +2850,7 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV64I-NEXT: andi a3, a2, 63
; RV64I-NEXT: srl a4, a0, a2
; RV64I-NEXT: srl a2, a1, a2
-; RV64I-NEXT: negw a3, a3
+; RV64I-NEXT: neg a3, a3
; RV64I-NEXT: sll a0, a0, a3
; RV64I-NEXT: sll a1, a1, a3
; RV64I-NEXT: or a0, a4, a0
@@ -3052,7 +3052,7 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV64XTHEADBB-NEXT: andi a3, a2, 63
; RV64XTHEADBB-NEXT: srl a4, a0, a2
; RV64XTHEADBB-NEXT: srl a2, a1, a2
-; RV64XTHEADBB-NEXT: negw a3, a3
+; RV64XTHEADBB-NEXT: neg a3, a3
; RV64XTHEADBB-NEXT: sll a0, a0, a3
; RV64XTHEADBB-NEXT: sll a1, a1, a3
; RV64XTHEADBB-NEXT: or a0, a4, a0
@@ -3116,7 +3116,7 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind {
; RV64I-LABEL: rotl_64_zext:
; RV64I: # %bb.0:
; RV64I-NEXT: li a2, 64
-; RV64I-NEXT: subw a2, a2, a1
+; RV64I-NEXT: sub a2, a2, a1
; RV64I-NEXT: sll a1, a0, a1
; RV64I-NEXT: srl a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -3171,7 +3171,7 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind {
; RV64ZBB-LABEL: rotl_64_zext:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: li a2, 64
-; RV64ZBB-NEXT: subw a2, a2, a1
+; RV64ZBB-NEXT: sub a2, a2, a1
; RV64ZBB-NEXT: sll a1, a0, a1
; RV64ZBB-NEXT: srl a0, a0, a2
; RV64ZBB-NEXT: or a0, a1, a0
@@ -3226,7 +3226,7 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_64_zext:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: li a2, 64
-; RV64XTHEADBB-NEXT: subw a2, a2, a1
+; RV64XTHEADBB-NEXT: sub a2, a2, a1
; RV64XTHEADBB-NEXT: sll a1, a0, a1
; RV64XTHEADBB-NEXT: srl a0, a0, a2
; RV64XTHEADBB-NEXT: or a0, a1, a0
@@ -3289,7 +3289,7 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind {
; RV64I-LABEL: rotr_64_zext:
; RV64I: # %bb.0:
; RV64I-NEXT: li a2, 64
-; RV64I-NEXT: subw a2, a2, a1
+; RV64I-NEXT: sub a2, a2, a1
; RV64I-NEXT: srl a1, a0, a1
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -3343,7 +3343,7 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind {
; RV64ZBB-LABEL: rotr_64_zext:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: li a2, 64
-; RV64ZBB-NEXT: subw a2, a2, a1
+; RV64ZBB-NEXT: sub a2, a2, a1
; RV64ZBB-NEXT: srl a1, a0, a1
; RV64ZBB-NEXT: sll a0, a0, a2
; RV64ZBB-NEXT: or a0, a1, a0
@@ -3397,7 +3397,7 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_64_zext:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: li a2, 64
-; RV64XTHEADBB-NEXT: subw a2, a2, a1
+; RV64XTHEADBB-NEXT: sub a2, a2, a1
; RV64XTHEADBB-NEXT: srl a1, a0, a1
; RV64XTHEADBB-NEXT: sll a0, a0, a2
; RV64XTHEADBB-NEXT: or a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
index 1eddb8f..b7f84ba 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb-zbkb.ll
@@ -107,7 +107,7 @@ declare i32 @llvm.fshl.i32(i32, i32, i32)
define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: rol_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sllw a1, a0, a1
; RV64I-NEXT: srlw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -125,7 +125,7 @@ define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
define void @rol_i32_nosext(i32 signext %a, i32 signext %b, ptr %x) nounwind {
; RV64I-LABEL: rol_i32_nosext:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a3, a1
+; RV64I-NEXT: neg a3, a1
; RV64I-NEXT: sllw a1, a0, a1
; RV64I-NEXT: srlw a0, a0, a3
; RV64I-NEXT: or a0, a1, a0
@@ -146,7 +146,7 @@ define signext i32 @rol_i32_neg_constant_rhs(i32 signext %a) nounwind {
; RV64I-LABEL: rol_i32_neg_constant_rhs:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, -2
-; RV64I-NEXT: negw a2, a0
+; RV64I-NEXT: neg a2, a0
; RV64I-NEXT: sllw a0, a1, a0
; RV64I-NEXT: srlw a1, a1, a2
; RV64I-NEXT: or a0, a0, a1
@@ -166,7 +166,7 @@ declare i64 @llvm.fshl.i64(i64, i64, i64)
define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: rol_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sll a1, a0, a1
; RV64I-NEXT: srl a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -185,7 +185,7 @@ declare i32 @llvm.fshr.i32(i32, i32, i32)
define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: ror_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srlw a1, a0, a1
; RV64I-NEXT: sllw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -203,7 +203,7 @@ define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
define void @ror_i32_nosext(i32 signext %a, i32 signext %b, ptr %x) nounwind {
; RV64I-LABEL: ror_i32_nosext:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a3, a1
+; RV64I-NEXT: neg a3, a1
; RV64I-NEXT: srlw a1, a0, a1
; RV64I-NEXT: sllw a0, a0, a3
; RV64I-NEXT: or a0, a1, a0
@@ -224,7 +224,7 @@ define signext i32 @ror_i32_neg_constant_rhs(i32 signext %a) nounwind {
; RV64I-LABEL: ror_i32_neg_constant_rhs:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, -2
-; RV64I-NEXT: negw a2, a0
+; RV64I-NEXT: neg a2, a0
; RV64I-NEXT: srlw a0, a1, a0
; RV64I-NEXT: sllw a1, a1, a2
; RV64I-NEXT: or a0, a0, a1
@@ -244,7 +244,7 @@ declare i64 @llvm.fshr.i64(i64, i64, i64)
define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: ror_i64:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srl a1, a0, a1
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll
index 9690302..2dd3bb3 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll
@@ -31,7 +31,7 @@ define signext i32 @ctlz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -88,7 +88,7 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -103,7 +103,7 @@ define signext i32 @log2_i32(i32 signext %a) nounwind {
; RV64I-NEXT: call __muldi3
; RV64I-NEXT: srliw a0, a0, 24
; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: subw a0, a1, a0
+; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: j .LBB1_3
; RV64I-NEXT: .LBB1_2:
; RV64I-NEXT: li a0, 32
@@ -153,7 +153,7 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -168,7 +168,7 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
; RV64I-NEXT: call __muldi3
; RV64I-NEXT: srliw a0, a0, 24
; RV64I-NEXT: li a1, 32
-; RV64I-NEXT: subw a1, a1, a0
+; RV64I-NEXT: sub a1, a1, a0
; RV64I-NEXT: .LBB2_2: # %cond.end
; RV64I-NEXT: subw a0, s0, a1
; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
@@ -212,7 +212,7 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -283,7 +283,7 @@ define i32 @ctlz_lshr_i32(i32 signext %a) {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -412,7 +412,7 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -455,7 +455,7 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -497,7 +497,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -553,7 +553,7 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -672,7 +672,7 @@ define signext i32 @ctpop_i32(i32 signext %a) nounwind {
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -728,7 +728,7 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind {
; RV64I-NEXT: and a1, a2, a1
; RV64I-NEXT: lui a2, 209715
; RV64I-NEXT: addi a2, a2, 819
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: srliw a1, a0, 2
; RV64I-NEXT: and a0, a0, a2
; RV64I-NEXT: and a1, a1, a2
@@ -748,7 +748,7 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind {
;
; RV64ZBB-LABEL: ctpop_i32_load:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: lwu a0, 0(a0)
+; RV64ZBB-NEXT: lw a0, 0(a0)
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: ret
%a = load i32, ptr %p
@@ -1053,9 +1053,8 @@ define signext i32 @abs_i32_sext(i32 signext %x) {
; RV64I-LABEL: abs_i32_sext:
; RV64I: # %bb.0:
; RV64I-NEXT: srai a1, a0, 31
-; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: xor a0, a0, a1
-; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: abs_i32_sext:
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll
index cd59c9e..ba058ca 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll
@@ -114,7 +114,7 @@ define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind {
define i64 @pack_i64_3(ptr %0, ptr %1) {
; RV64I-LABEL: pack_i64_3:
; RV64I: # %bb.0:
-; RV64I-NEXT: lwu a0, 0(a0)
+; RV64I-NEXT: lw a0, 0(a0)
; RV64I-NEXT: lwu a1, 0(a1)
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: or a0, a0, a1
@@ -122,8 +122,8 @@ define i64 @pack_i64_3(ptr %0, ptr %1) {
;
; RV64ZBKB-LABEL: pack_i64_3:
; RV64ZBKB: # %bb.0:
-; RV64ZBKB-NEXT: lwu a0, 0(a0)
-; RV64ZBKB-NEXT: lwu a1, 0(a1)
+; RV64ZBKB-NEXT: lw a0, 0(a0)
+; RV64ZBKB-NEXT: lw a1, 0(a1)
; RV64ZBKB-NEXT: pack a0, a1, a0
; RV64ZBKB-NEXT: ret
%3 = load i32, ptr %0, align 4
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll
index 8b262db..d634cc9 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll
@@ -330,13 +330,13 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind {
; RV64I-NEXT: li a3, 64
; RV64I-NEXT: bltu a2, a3, .LBB6_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: subw a4, a2, a3
+; RV64I-NEXT: sub a4, a2, a3
; RV64I-NEXT: srl a4, a1, a4
; RV64I-NEXT: bnez a2, .LBB6_3
; RV64I-NEXT: j .LBB6_4
; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: srl a4, a0, a2
-; RV64I-NEXT: negw a5, a2
+; RV64I-NEXT: neg a5, a2
; RV64I-NEXT: sll a5, a1, a5
; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: beqz a2, .LBB6_4
@@ -476,13 +476,13 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
; RV64I-NEXT: li a3, 64
; RV64I-NEXT: bltu a2, a3, .LBB7_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: subw a4, a2, a3
+; RV64I-NEXT: sub a4, a2, a3
; RV64I-NEXT: sra a4, a1, a4
; RV64I-NEXT: bnez a2, .LBB7_3
; RV64I-NEXT: j .LBB7_4
; RV64I-NEXT: .LBB7_2:
; RV64I-NEXT: srl a4, a0, a2
-; RV64I-NEXT: negw a5, a2
+; RV64I-NEXT: neg a5, a2
; RV64I-NEXT: sll a5, a1, a5
; RV64I-NEXT: or a4, a4, a5
; RV64I-NEXT: beqz a2, .LBB7_4
@@ -615,13 +615,13 @@ define i128 @shl128(i128 %a, i128 %b) nounwind {
; RV64I-NEXT: bltu a2, a4, .LBB8_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a0, 0
-; RV64I-NEXT: subw a4, a2, a4
+; RV64I-NEXT: sub a4, a2, a4
; RV64I-NEXT: sll a3, a3, a4
; RV64I-NEXT: bnez a2, .LBB8_3
; RV64I-NEXT: j .LBB8_4
; RV64I-NEXT: .LBB8_2:
; RV64I-NEXT: sll a0, a3, a2
-; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: neg a4, a2
; RV64I-NEXT: srl a3, a3, a4
; RV64I-NEXT: sll a4, a1, a2
; RV64I-NEXT: or a3, a3, a4
@@ -685,7 +685,7 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind {
;
; RV64I-LABEL: fshr64_minsize:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srl a1, a0, a1
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -914,12 +914,12 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV64I-NEXT: li a4, 64
; RV64I-NEXT: bltu a5, a4, .LBB10_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: subw a3, a5, a4
+; RV64I-NEXT: sub a3, a5, a4
; RV64I-NEXT: srl a6, a1, a3
; RV64I-NEXT: j .LBB10_3
; RV64I-NEXT: .LBB10_2:
; RV64I-NEXT: srl a3, a0, a2
-; RV64I-NEXT: negw a6, a5
+; RV64I-NEXT: neg a6, a5
; RV64I-NEXT: sll a6, a1, a6
; RV64I-NEXT: or a6, a3, a6
; RV64I-NEXT: .LBB10_3:
@@ -928,7 +928,7 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV64I-NEXT: # %bb.4:
; RV64I-NEXT: mv a3, a6
; RV64I-NEXT: .LBB10_5:
-; RV64I-NEXT: negw a7, a2
+; RV64I-NEXT: neg a7, a2
; RV64I-NEXT: bltu a5, a4, .LBB10_7
; RV64I-NEXT: # %bb.6:
; RV64I-NEXT: li a2, 0
@@ -940,13 +940,13 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
; RV64I-NEXT: bltu a6, a4, .LBB10_10
; RV64I-NEXT: # %bb.9:
; RV64I-NEXT: li a5, 0
-; RV64I-NEXT: subw a4, a6, a4
+; RV64I-NEXT: sub a4, a6, a4
; RV64I-NEXT: sll a0, a0, a4
; RV64I-NEXT: bnez a6, .LBB10_11
; RV64I-NEXT: j .LBB10_12
; RV64I-NEXT: .LBB10_10:
; RV64I-NEXT: sll a5, a0, a7
-; RV64I-NEXT: negw a4, a6
+; RV64I-NEXT: neg a4, a6
; RV64I-NEXT: srl a0, a0, a4
; RV64I-NEXT: sll a4, a1, a7
; RV64I-NEXT: or a0, a0, a4
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
index 69519c0..014b1c1 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -758,13 +758,13 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a3, a6, a7
; RV64I-NEXT: bltu a1, a4, .LBB6_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: subw a5, a1, a4
+; RV64I-NEXT: sub a5, a1, a4
; RV64I-NEXT: srl a5, a3, a5
; RV64I-NEXT: bnez a1, .LBB6_3
; RV64I-NEXT: j .LBB6_4
; RV64I-NEXT: .LBB6_2:
; RV64I-NEXT: srl a5, a0, a1
-; RV64I-NEXT: negw a6, a1
+; RV64I-NEXT: neg a6, a1
; RV64I-NEXT: sll a6, a3, a6
; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: beqz a1, .LBB6_4
@@ -1091,13 +1091,13 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: or a3, a6, a7
; RV64I-NEXT: bltu a1, a4, .LBB7_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: subw a5, a1, a4
+; RV64I-NEXT: sub a5, a1, a4
; RV64I-NEXT: srl a5, a3, a5
; RV64I-NEXT: bnez a1, .LBB7_3
; RV64I-NEXT: j .LBB7_4
; RV64I-NEXT: .LBB7_2:
; RV64I-NEXT: srl a5, a0, a1
-; RV64I-NEXT: negw a6, a1
+; RV64I-NEXT: neg a6, a1
; RV64I-NEXT: sll a6, a3, a6
; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: beqz a1, .LBB7_4
@@ -1425,13 +1425,13 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: bltu a3, a5, .LBB8_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: subw a5, a3, a5
+; RV64I-NEXT: sub a5, a3, a5
; RV64I-NEXT: sll a4, a4, a5
; RV64I-NEXT: bnez a3, .LBB8_3
; RV64I-NEXT: j .LBB8_4
; RV64I-NEXT: .LBB8_2:
; RV64I-NEXT: sll a1, a4, a3
-; RV64I-NEXT: negw a5, a3
+; RV64I-NEXT: neg a5, a3
; RV64I-NEXT: srl a4, a4, a5
; RV64I-NEXT: sll a5, a0, a3
; RV64I-NEXT: or a4, a4, a5
@@ -1754,13 +1754,13 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: bltu a3, a5, .LBB9_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, 0
-; RV64I-NEXT: subw a5, a3, a5
+; RV64I-NEXT: sub a5, a3, a5
; RV64I-NEXT: sll a4, a4, a5
; RV64I-NEXT: bnez a3, .LBB9_3
; RV64I-NEXT: j .LBB9_4
; RV64I-NEXT: .LBB9_2:
; RV64I-NEXT: sll a1, a4, a3
-; RV64I-NEXT: negw a5, a3
+; RV64I-NEXT: neg a5, a3
; RV64I-NEXT: srl a4, a4, a5
; RV64I-NEXT: sll a5, a0, a3
; RV64I-NEXT: or a4, a4, a5
@@ -2083,13 +2083,13 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a3, a6, a7
; RV64I-NEXT: bltu a1, a4, .LBB10_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: subw a5, a1, a4
+; RV64I-NEXT: sub a5, a1, a4
; RV64I-NEXT: sra a5, a3, a5
; RV64I-NEXT: bnez a1, .LBB10_3
; RV64I-NEXT: j .LBB10_4
; RV64I-NEXT: .LBB10_2:
; RV64I-NEXT: srl a5, a0, a1
-; RV64I-NEXT: negw a6, a1
+; RV64I-NEXT: neg a6, a1
; RV64I-NEXT: sll a6, a3, a6
; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: beqz a1, .LBB10_4
@@ -2416,13 +2416,13 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: or a3, a6, a7
; RV64I-NEXT: bltu a1, a4, .LBB11_2
; RV64I-NEXT: # %bb.1:
-; RV64I-NEXT: subw a5, a1, a4
+; RV64I-NEXT: sub a5, a1, a4
; RV64I-NEXT: sra a5, a3, a5
; RV64I-NEXT: bnez a1, .LBB11_3
; RV64I-NEXT: j .LBB11_4
; RV64I-NEXT: .LBB11_2:
; RV64I-NEXT: srl a5, a0, a1
-; RV64I-NEXT: negw a6, a1
+; RV64I-NEXT: neg a6, a1
; RV64I-NEXT: sll a6, a3, a6
; RV64I-NEXT: or a5, a5, a6
; RV64I-NEXT: beqz a1, .LBB11_4
@@ -2796,8 +2796,8 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or t0, t5, t3
; RV64I-NEXT: or a5, s0, t6
; RV64I-NEXT: slli a5, a5, 3
-; RV64I-NEXT: subw t1, a5, a7
-; RV64I-NEXT: negw t5, a5
+; RV64I-NEXT: sub t1, a5, a7
+; RV64I-NEXT: neg t5, a5
; RV64I-NEXT: sll t3, t0, t5
; RV64I-NEXT: bltu a5, a7, .LBB12_2
; RV64I-NEXT: # %bb.1:
@@ -2842,7 +2842,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: bgeu t6, a7, .LBB12_14
; RV64I-NEXT: .LBB12_12:
; RV64I-NEXT: sll t5, a6, t5
-; RV64I-NEXT: negw s0, t6
+; RV64I-NEXT: neg s0, t6
; RV64I-NEXT: srl s0, a6, s0
; RV64I-NEXT: or s1, s0, t3
; RV64I-NEXT: j .LBB12_15
@@ -2851,7 +2851,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: bltu t6, a7, .LBB12_12
; RV64I-NEXT: .LBB12_14:
; RV64I-NEXT: li t5, 0
-; RV64I-NEXT: subw t3, t6, a7
+; RV64I-NEXT: sub t3, t6, a7
; RV64I-NEXT: sll s1, a6, t3
; RV64I-NEXT: .LBB12_15:
; RV64I-NEXT: sub s0, a5, t1
@@ -2862,13 +2862,13 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: .LBB12_17:
; RV64I-NEXT: bltu s0, a7, .LBB12_19
; RV64I-NEXT: # %bb.18:
-; RV64I-NEXT: subw t6, s0, a7
+; RV64I-NEXT: sub t6, s0, a7
; RV64I-NEXT: srl t6, t0, t6
; RV64I-NEXT: bnez s0, .LBB12_20
; RV64I-NEXT: j .LBB12_21
; RV64I-NEXT: .LBB12_19:
; RV64I-NEXT: srl t6, a6, s0
-; RV64I-NEXT: negw s1, s0
+; RV64I-NEXT: neg s1, s0
; RV64I-NEXT: sll s1, t0, s1
; RV64I-NEXT: or t6, t6, s1
; RV64I-NEXT: beqz s0, .LBB12_21
@@ -3720,8 +3720,8 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: or t0, t5, t3
; RV64I-NEXT: or a5, s0, t6
; RV64I-NEXT: slli a5, a5, 5
-; RV64I-NEXT: subw t1, a5, a7
-; RV64I-NEXT: negw t5, a5
+; RV64I-NEXT: sub t1, a5, a7
+; RV64I-NEXT: neg t5, a5
; RV64I-NEXT: sll t3, t0, t5
; RV64I-NEXT: bltu a5, a7, .LBB13_2
; RV64I-NEXT: # %bb.1:
@@ -3766,7 +3766,7 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: bgeu t6, a7, .LBB13_14
; RV64I-NEXT: .LBB13_12:
; RV64I-NEXT: sll t5, a6, t5
-; RV64I-NEXT: negw s0, t6
+; RV64I-NEXT: neg s0, t6
; RV64I-NEXT: srl s0, a6, s0
; RV64I-NEXT: or s1, s0, t3
; RV64I-NEXT: j .LBB13_15
@@ -3775,7 +3775,7 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: bltu t6, a7, .LBB13_12
; RV64I-NEXT: .LBB13_14:
; RV64I-NEXT: li t5, 0
-; RV64I-NEXT: subw t3, t6, a7
+; RV64I-NEXT: sub t3, t6, a7
; RV64I-NEXT: sll s1, a6, t3
; RV64I-NEXT: .LBB13_15:
; RV64I-NEXT: sub s0, a5, t1
@@ -3786,13 +3786,13 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: .LBB13_17:
; RV64I-NEXT: bltu s0, a7, .LBB13_19
; RV64I-NEXT: # %bb.18:
-; RV64I-NEXT: subw t6, s0, a7
+; RV64I-NEXT: sub t6, s0, a7
; RV64I-NEXT: srl t6, t0, t6
; RV64I-NEXT: bnez s0, .LBB13_20
; RV64I-NEXT: j .LBB13_21
; RV64I-NEXT: .LBB13_19:
; RV64I-NEXT: srl t6, a6, s0
-; RV64I-NEXT: negw s1, s0
+; RV64I-NEXT: neg s1, s0
; RV64I-NEXT: sll s1, t0, s1
; RV64I-NEXT: or t6, t6, s1
; RV64I-NEXT: beqz s0, .LBB13_21
@@ -4644,8 +4644,8 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: or t0, t5, t3
; RV64I-NEXT: or a5, s0, t6
; RV64I-NEXT: slli a5, a5, 6
-; RV64I-NEXT: subw t1, a5, a7
-; RV64I-NEXT: negw t5, a5
+; RV64I-NEXT: sub t1, a5, a7
+; RV64I-NEXT: neg t5, a5
; RV64I-NEXT: sll t3, t0, t5
; RV64I-NEXT: bltu a5, a7, .LBB14_2
; RV64I-NEXT: # %bb.1:
@@ -4690,7 +4690,7 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: bgeu t6, a7, .LBB14_14
; RV64I-NEXT: .LBB14_12:
; RV64I-NEXT: sll t5, a6, t5
-; RV64I-NEXT: negw s0, t6
+; RV64I-NEXT: neg s0, t6
; RV64I-NEXT: srl s0, a6, s0
; RV64I-NEXT: or s1, s0, t3
; RV64I-NEXT: j .LBB14_15
@@ -4699,7 +4699,7 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: bltu t6, a7, .LBB14_12
; RV64I-NEXT: .LBB14_14:
; RV64I-NEXT: li t5, 0
-; RV64I-NEXT: subw t3, t6, a7
+; RV64I-NEXT: sub t3, t6, a7
; RV64I-NEXT: sll s1, a6, t3
; RV64I-NEXT: .LBB14_15:
; RV64I-NEXT: sub s0, a5, t1
@@ -4710,13 +4710,13 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: .LBB14_17:
; RV64I-NEXT: bltu s0, a7, .LBB14_19
; RV64I-NEXT: # %bb.18:
-; RV64I-NEXT: subw t6, s0, a7
+; RV64I-NEXT: sub t6, s0, a7
; RV64I-NEXT: srl t6, t0, t6
; RV64I-NEXT: bnez s0, .LBB14_20
; RV64I-NEXT: j .LBB14_21
; RV64I-NEXT: .LBB14_19:
; RV64I-NEXT: srl t6, a6, s0
-; RV64I-NEXT: negw s1, s0
+; RV64I-NEXT: neg s1, s0
; RV64I-NEXT: sll s1, t0, s1
; RV64I-NEXT: or t6, t6, s1
; RV64I-NEXT: beqz s0, .LBB14_21
@@ -5542,8 +5542,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a5, s0, a6
; RV64I-NEXT: or a6, a1, s5
; RV64I-NEXT: slli a6, a6, 3
-; RV64I-NEXT: subw t2, a6, t0
-; RV64I-NEXT: negw t3, a6
+; RV64I-NEXT: sub t2, a6, t0
+; RV64I-NEXT: neg t3, a6
; RV64I-NEXT: srl s0, t1, t3
; RV64I-NEXT: bltu a6, t0, .LBB15_2
; RV64I-NEXT: # %bb.1:
@@ -5585,11 +5585,11 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: slli s4, s9, 16
; RV64I-NEXT: bltu a4, t0, .LBB15_7
; RV64I-NEXT: # %bb.6:
-; RV64I-NEXT: subw s0, a4, t0
+; RV64I-NEXT: sub s0, a4, t0
; RV64I-NEXT: srl s0, a5, s0
; RV64I-NEXT: j .LBB15_8
; RV64I-NEXT: .LBB15_7:
-; RV64I-NEXT: negw s6, a4
+; RV64I-NEXT: neg s6, a4
; RV64I-NEXT: sll s6, a5, s6
; RV64I-NEXT: or s0, s0, s6
; RV64I-NEXT: .LBB15_8:
@@ -5637,13 +5637,13 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: bltu s0, t0, .LBB15_20
; RV64I-NEXT: # %bb.19:
; RV64I-NEXT: li t2, 0
-; RV64I-NEXT: subw t0, s0, t0
+; RV64I-NEXT: sub t0, s0, t0
; RV64I-NEXT: sll t0, t1, t0
; RV64I-NEXT: bnez s0, .LBB15_21
; RV64I-NEXT: j .LBB15_22
; RV64I-NEXT: .LBB15_20:
; RV64I-NEXT: sll t2, t1, s0
-; RV64I-NEXT: negw t0, s0
+; RV64I-NEXT: neg t0, s0
; RV64I-NEXT: srl t0, t1, t0
; RV64I-NEXT: sll t1, a5, s0
; RV64I-NEXT: or t0, t0, t1
@@ -6456,8 +6456,8 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: or a5, s0, a6
; RV64I-NEXT: or a6, a1, s5
; RV64I-NEXT: slli a6, a6, 5
-; RV64I-NEXT: subw t2, a6, t0
-; RV64I-NEXT: negw t3, a6
+; RV64I-NEXT: sub t2, a6, t0
+; RV64I-NEXT: neg t3, a6
; RV64I-NEXT: srl s0, t1, t3
; RV64I-NEXT: bltu a6, t0, .LBB16_2
; RV64I-NEXT: # %bb.1:
@@ -6499,11 +6499,11 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: slli s4, s9, 16
; RV64I-NEXT: bltu a4, t0, .LBB16_7
; RV64I-NEXT: # %bb.6:
-; RV64I-NEXT: subw s0, a4, t0
+; RV64I-NEXT: sub s0, a4, t0
; RV64I-NEXT: srl s0, a5, s0
; RV64I-NEXT: j .LBB16_8
; RV64I-NEXT: .LBB16_7:
-; RV64I-NEXT: negw s6, a4
+; RV64I-NEXT: neg s6, a4
; RV64I-NEXT: sll s6, a5, s6
; RV64I-NEXT: or s0, s0, s6
; RV64I-NEXT: .LBB16_8:
@@ -6551,13 +6551,13 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw
; RV64I-NEXT: bltu s0, t0, .LBB16_20
; RV64I-NEXT: # %bb.19:
; RV64I-NEXT: li t2, 0
-; RV64I-NEXT: subw t0, s0, t0
+; RV64I-NEXT: sub t0, s0, t0
; RV64I-NEXT: sll t0, t1, t0
; RV64I-NEXT: bnez s0, .LBB16_21
; RV64I-NEXT: j .LBB16_22
; RV64I-NEXT: .LBB16_20:
; RV64I-NEXT: sll t2, t1, s0
-; RV64I-NEXT: negw t0, s0
+; RV64I-NEXT: neg t0, s0
; RV64I-NEXT: srl t0, t1, t0
; RV64I-NEXT: sll t1, a5, s0
; RV64I-NEXT: or t0, t0, t1
@@ -7370,8 +7370,8 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV64I-NEXT: or a5, s0, a6
; RV64I-NEXT: or a6, a1, s5
; RV64I-NEXT: slli a6, a6, 6
-; RV64I-NEXT: subw t2, a6, t0
-; RV64I-NEXT: negw t3, a6
+; RV64I-NEXT: sub t2, a6, t0
+; RV64I-NEXT: neg t3, a6
; RV64I-NEXT: srl s0, t1, t3
; RV64I-NEXT: bltu a6, t0, .LBB17_2
; RV64I-NEXT: # %bb.1:
@@ -7413,11 +7413,11 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV64I-NEXT: slli s4, s9, 16
; RV64I-NEXT: bltu a4, t0, .LBB17_7
; RV64I-NEXT: # %bb.6:
-; RV64I-NEXT: subw s0, a4, t0
+; RV64I-NEXT: sub s0, a4, t0
; RV64I-NEXT: srl s0, a5, s0
; RV64I-NEXT: j .LBB17_8
; RV64I-NEXT: .LBB17_7:
-; RV64I-NEXT: negw s6, a4
+; RV64I-NEXT: neg s6, a4
; RV64I-NEXT: sll s6, a5, s6
; RV64I-NEXT: or s0, s0, s6
; RV64I-NEXT: .LBB17_8:
@@ -7465,13 +7465,13 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou
; RV64I-NEXT: bltu s0, t0, .LBB17_20
; RV64I-NEXT: # %bb.19:
; RV64I-NEXT: li t2, 0
-; RV64I-NEXT: subw t0, s0, t0
+; RV64I-NEXT: sub t0, s0, t0
; RV64I-NEXT: sll t0, t1, t0
; RV64I-NEXT: bnez s0, .LBB17_21
; RV64I-NEXT: j .LBB17_22
; RV64I-NEXT: .LBB17_20:
; RV64I-NEXT: sll t2, t1, s0
-; RV64I-NEXT: negw t0, s0
+; RV64I-NEXT: neg t0, s0
; RV64I-NEXT: srl t0, t1, t0
; RV64I-NEXT: sll t1, a5, s0
; RV64I-NEXT: or t0, t0, t1
@@ -8310,8 +8310,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: or a5, t5, t4
; RV64I-NEXT: or a6, s0, t6
; RV64I-NEXT: slli a6, a6, 3
-; RV64I-NEXT: subw t1, a6, t0
-; RV64I-NEXT: negw t5, a6
+; RV64I-NEXT: sub t1, a6, t0
+; RV64I-NEXT: neg t5, a6
; RV64I-NEXT: sll t4, a5, t5
; RV64I-NEXT: bltu a6, t0, .LBB18_2
; RV64I-NEXT: # %bb.1:
@@ -8356,7 +8356,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: bgeu t6, t0, .LBB18_14
; RV64I-NEXT: .LBB18_12:
; RV64I-NEXT: sll t5, a7, t5
-; RV64I-NEXT: negw s0, t6
+; RV64I-NEXT: neg s0, t6
; RV64I-NEXT: srl s0, a7, s0
; RV64I-NEXT: or s1, s0, t4
; RV64I-NEXT: j .LBB18_15
@@ -8365,7 +8365,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: bltu t6, t0, .LBB18_12
; RV64I-NEXT: .LBB18_14:
; RV64I-NEXT: li t5, 0
-; RV64I-NEXT: subw t4, t6, t0
+; RV64I-NEXT: sub t4, t6, t0
; RV64I-NEXT: sll s1, a7, t4
; RV64I-NEXT: .LBB18_15:
; RV64I-NEXT: sub s0, a6, t1
@@ -8376,13 +8376,13 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: .LBB18_17:
; RV64I-NEXT: bltu s0, t0, .LBB18_19
; RV64I-NEXT: # %bb.18:
-; RV64I-NEXT: subw t6, s0, t0
+; RV64I-NEXT: sub t6, s0, t0
; RV64I-NEXT: sra t6, a5, t6
; RV64I-NEXT: bnez s0, .LBB18_20
; RV64I-NEXT: j .LBB18_21
; RV64I-NEXT: .LBB18_19:
; RV64I-NEXT: srl t6, a7, s0
-; RV64I-NEXT: negw s1, s0
+; RV64I-NEXT: neg s1, s0
; RV64I-NEXT: sll s1, a5, s1
; RV64I-NEXT: or t6, t6, s1
; RV64I-NEXT: beqz s0, .LBB18_21
@@ -9241,8 +9241,8 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: or a5, t5, t4
; RV64I-NEXT: or a6, s0, t6
; RV64I-NEXT: slli a6, a6, 5
-; RV64I-NEXT: subw t1, a6, t0
-; RV64I-NEXT: negw t5, a6
+; RV64I-NEXT: sub t1, a6, t0
+; RV64I-NEXT: neg t5, a6
; RV64I-NEXT: sll t4, a5, t5
; RV64I-NEXT: bltu a6, t0, .LBB19_2
; RV64I-NEXT: # %bb.1:
@@ -9287,7 +9287,7 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: bgeu t6, t0, .LBB19_14
; RV64I-NEXT: .LBB19_12:
; RV64I-NEXT: sll t5, a7, t5
-; RV64I-NEXT: negw s0, t6
+; RV64I-NEXT: neg s0, t6
; RV64I-NEXT: srl s0, a7, s0
; RV64I-NEXT: or s1, s0, t4
; RV64I-NEXT: j .LBB19_15
@@ -9296,7 +9296,7 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: bltu t6, t0, .LBB19_12
; RV64I-NEXT: .LBB19_14:
; RV64I-NEXT: li t5, 0
-; RV64I-NEXT: subw t4, t6, t0
+; RV64I-NEXT: sub t4, t6, t0
; RV64I-NEXT: sll s1, a7, t4
; RV64I-NEXT: .LBB19_15:
; RV64I-NEXT: sub s0, a6, t1
@@ -9307,13 +9307,13 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun
; RV64I-NEXT: .LBB19_17:
; RV64I-NEXT: bltu s0, t0, .LBB19_19
; RV64I-NEXT: # %bb.18:
-; RV64I-NEXT: subw t6, s0, t0
+; RV64I-NEXT: sub t6, s0, t0
; RV64I-NEXT: sra t6, a5, t6
; RV64I-NEXT: bnez s0, .LBB19_20
; RV64I-NEXT: j .LBB19_21
; RV64I-NEXT: .LBB19_19:
; RV64I-NEXT: srl t6, a7, s0
-; RV64I-NEXT: negw s1, s0
+; RV64I-NEXT: neg s1, s0
; RV64I-NEXT: sll s1, a5, s1
; RV64I-NEXT: or t6, t6, s1
; RV64I-NEXT: beqz s0, .LBB19_21
@@ -10172,8 +10172,8 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: or a5, t5, t4
; RV64I-NEXT: or a6, s0, t6
; RV64I-NEXT: slli a6, a6, 6
-; RV64I-NEXT: subw t1, a6, t0
-; RV64I-NEXT: negw t5, a6
+; RV64I-NEXT: sub t1, a6, t0
+; RV64I-NEXT: neg t5, a6
; RV64I-NEXT: sll t4, a5, t5
; RV64I-NEXT: bltu a6, t0, .LBB20_2
; RV64I-NEXT: # %bb.1:
@@ -10218,7 +10218,7 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: bgeu t6, t0, .LBB20_14
; RV64I-NEXT: .LBB20_12:
; RV64I-NEXT: sll t5, a7, t5
-; RV64I-NEXT: negw s0, t6
+; RV64I-NEXT: neg s0, t6
; RV64I-NEXT: srl s0, a7, s0
; RV64I-NEXT: or s1, s0, t4
; RV64I-NEXT: j .LBB20_15
@@ -10227,7 +10227,7 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: bltu t6, t0, .LBB20_12
; RV64I-NEXT: .LBB20_14:
; RV64I-NEXT: li t5, 0
-; RV64I-NEXT: subw t4, t6, t0
+; RV64I-NEXT: sub t4, t6, t0
; RV64I-NEXT: sll s1, a7, t4
; RV64I-NEXT: .LBB20_15:
; RV64I-NEXT: sub s0, a6, t1
@@ -10238,13 +10238,13 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no
; RV64I-NEXT: .LBB20_17:
; RV64I-NEXT: bltu s0, t0, .LBB20_19
; RV64I-NEXT: # %bb.18:
-; RV64I-NEXT: subw t6, s0, t0
+; RV64I-NEXT: sub t6, s0, t0
; RV64I-NEXT: sra t6, a5, t6
; RV64I-NEXT: bnez s0, .LBB20_20
; RV64I-NEXT: j .LBB20_21
; RV64I-NEXT: .LBB20_19:
; RV64I-NEXT: srl t6, a7, s0
-; RV64I-NEXT: negw s1, s0
+; RV64I-NEXT: neg s1, s0
; RV64I-NEXT: sll s1, a5, s1
; RV64I-NEXT: or t6, t6, s1
; RV64I-NEXT: beqz s0, .LBB20_21
diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll
index 3fb0f2c..41f73f5 100644
--- a/llvm/test/CodeGen/RISCV/abds-neg.ll
+++ b/llvm/test/CodeGen/RISCV/abds-neg.ll
@@ -2221,7 +2221,7 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
;
; RV64I-LABEL: abd_subnsw_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a1, a0
@@ -2236,7 +2236,7 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
;
; RV64ZBB-LABEL: abd_subnsw_i32:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: sub a0, a0, a1
; RV64ZBB-NEXT: sraiw a1, a0, 31
; RV64ZBB-NEXT: xor a0, a0, a1
; RV64ZBB-NEXT: subw a0, a1, a0
@@ -2258,7 +2258,7 @@ define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
;
; RV64I-LABEL: abd_subnsw_i32_undef:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a1, a0
@@ -2273,7 +2273,7 @@ define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
;
; RV64ZBB-LABEL: abd_subnsw_i32_undef:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: sub a0, a0, a1
; RV64ZBB-NEXT: sraiw a1, a0, 31
; RV64ZBB-NEXT: xor a0, a0, a1
; RV64ZBB-NEXT: subw a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll
index efb4e1a..28a95ef 100644
--- a/llvm/test/CodeGen/RISCV/abds.ll
+++ b/llvm/test/CodeGen/RISCV/abds.ll
@@ -1733,21 +1733,13 @@ define i8 @abd_subnsw_i8(i8 %a, i8 %b) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
;
-; RV32ZBB-LABEL: abd_subnsw_i8:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: sub a0, a0, a1
-; RV32ZBB-NEXT: sext.b a0, a0
-; RV32ZBB-NEXT: neg a1, a0
-; RV32ZBB-NEXT: max a0, a0, a1
-; RV32ZBB-NEXT: ret
-;
-; RV64ZBB-LABEL: abd_subnsw_i8:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: subw a0, a0, a1
-; RV64ZBB-NEXT: sext.b a0, a0
-; RV64ZBB-NEXT: neg a1, a0
-; RV64ZBB-NEXT: max a0, a0, a1
-; RV64ZBB-NEXT: ret
+; ZBB-LABEL: abd_subnsw_i8:
+; ZBB: # %bb.0:
+; ZBB-NEXT: sub a0, a0, a1
+; ZBB-NEXT: sext.b a0, a0
+; ZBB-NEXT: neg a1, a0
+; ZBB-NEXT: max a0, a0, a1
+; ZBB-NEXT: ret
%sub = sub nsw i8 %a, %b
%abs = call i8 @llvm.abs.i8(i8 %sub, i1 false)
ret i8 %abs
@@ -1772,21 +1764,13 @@ define i8 @abd_subnsw_i8_undef(i8 %a, i8 %b) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
;
-; RV32ZBB-LABEL: abd_subnsw_i8_undef:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: sub a0, a0, a1
-; RV32ZBB-NEXT: sext.b a0, a0
-; RV32ZBB-NEXT: neg a1, a0
-; RV32ZBB-NEXT: max a0, a0, a1
-; RV32ZBB-NEXT: ret
-;
-; RV64ZBB-LABEL: abd_subnsw_i8_undef:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: subw a0, a0, a1
-; RV64ZBB-NEXT: sext.b a0, a0
-; RV64ZBB-NEXT: neg a1, a0
-; RV64ZBB-NEXT: max a0, a0, a1
-; RV64ZBB-NEXT: ret
+; ZBB-LABEL: abd_subnsw_i8_undef:
+; ZBB: # %bb.0:
+; ZBB-NEXT: sub a0, a0, a1
+; ZBB-NEXT: sext.b a0, a0
+; ZBB-NEXT: neg a1, a0
+; ZBB-NEXT: max a0, a0, a1
+; ZBB-NEXT: ret
%sub = sub nsw i8 %a, %b
%abs = call i8 @llvm.abs.i8(i8 %sub, i1 true)
ret i8 %abs
@@ -1811,21 +1795,13 @@ define i16 @abd_subnsw_i16(i16 %a, i16 %b) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
;
-; RV32ZBB-LABEL: abd_subnsw_i16:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: sub a0, a0, a1
-; RV32ZBB-NEXT: sext.h a0, a0
-; RV32ZBB-NEXT: neg a1, a0
-; RV32ZBB-NEXT: max a0, a0, a1
-; RV32ZBB-NEXT: ret
-;
-; RV64ZBB-LABEL: abd_subnsw_i16:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: subw a0, a0, a1
-; RV64ZBB-NEXT: sext.h a0, a0
-; RV64ZBB-NEXT: neg a1, a0
-; RV64ZBB-NEXT: max a0, a0, a1
-; RV64ZBB-NEXT: ret
+; ZBB-LABEL: abd_subnsw_i16:
+; ZBB: # %bb.0:
+; ZBB-NEXT: sub a0, a0, a1
+; ZBB-NEXT: sext.h a0, a0
+; ZBB-NEXT: neg a1, a0
+; ZBB-NEXT: max a0, a0, a1
+; ZBB-NEXT: ret
%sub = sub nsw i16 %a, %b
%abs = call i16 @llvm.abs.i16(i16 %sub, i1 false)
ret i16 %abs
@@ -1850,21 +1826,13 @@ define i16 @abd_subnsw_i16_undef(i16 %a, i16 %b) nounwind {
; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
;
-; RV32ZBB-LABEL: abd_subnsw_i16_undef:
-; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: sub a0, a0, a1
-; RV32ZBB-NEXT: sext.h a0, a0
-; RV32ZBB-NEXT: neg a1, a0
-; RV32ZBB-NEXT: max a0, a0, a1
-; RV32ZBB-NEXT: ret
-;
-; RV64ZBB-LABEL: abd_subnsw_i16_undef:
-; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: subw a0, a0, a1
-; RV64ZBB-NEXT: sext.h a0, a0
-; RV64ZBB-NEXT: neg a1, a0
-; RV64ZBB-NEXT: max a0, a0, a1
-; RV64ZBB-NEXT: ret
+; ZBB-LABEL: abd_subnsw_i16_undef:
+; ZBB: # %bb.0:
+; ZBB-NEXT: sub a0, a0, a1
+; ZBB-NEXT: sext.h a0, a0
+; ZBB-NEXT: neg a1, a0
+; ZBB-NEXT: max a0, a0, a1
+; ZBB-NEXT: ret
%sub = sub nsw i16 %a, %b
%abs = call i16 @llvm.abs.i16(i16 %sub, i1 true)
ret i16 %abs
@@ -1881,7 +1849,7 @@ define i32 @abd_subnsw_i32(i32 %a, i32 %b) nounwind {
;
; RV64I-LABEL: abd_subnsw_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a0, a1
@@ -1916,7 +1884,7 @@ define i32 @abd_subnsw_i32_undef(i32 %a, i32 %b) nounwind {
;
; RV64I-LABEL: abd_subnsw_i32_undef:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a0, a1
@@ -2317,7 +2285,7 @@ define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind {
;
; RV64I-LABEL: abd_sub_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: sraiw a1, a0, 31
; RV64I-NEXT: xor a0, a0, a1
; RV64I-NEXT: subw a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
index aac355e..3b2cab2 100644
--- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
+++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll
@@ -20,7 +20,7 @@ define i32 @add_mul_combine_accept_a1(i32 %x) {
; RV64IMB: # %bb.0:
; RV64IMB-NEXT: sh1add a1, a0, a0
; RV64IMB-NEXT: slli a0, a0, 5
-; RV64IMB-NEXT: subw a0, a0, a1
+; RV64IMB-NEXT: sub a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, 1073
; RV64IMB-NEXT: ret
%tmp0 = add i32 %x, 37
@@ -41,7 +41,7 @@ define signext i32 @add_mul_combine_accept_a2(i32 signext %x) {
; RV64IMB: # %bb.0:
; RV64IMB-NEXT: sh1add a1, a0, a0
; RV64IMB-NEXT: slli a0, a0, 5
-; RV64IMB-NEXT: subw a0, a0, a1
+; RV64IMB-NEXT: sub a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, 1073
; RV64IMB-NEXT: ret
%tmp0 = add i32 %x, 37
@@ -93,7 +93,7 @@ define i32 @add_mul_combine_accept_b1(i32 %x) {
; RV64IMB: # %bb.0:
; RV64IMB-NEXT: sh3add a1, a0, a0
; RV64IMB-NEXT: slli a0, a0, 5
-; RV64IMB-NEXT: subw a0, a0, a1
+; RV64IMB-NEXT: sub a0, a0, a1
; RV64IMB-NEXT: lui a1, 50
; RV64IMB-NEXT: addi a1, a1, 1119
; RV64IMB-NEXT: addw a0, a0, a1
@@ -118,7 +118,7 @@ define signext i32 @add_mul_combine_accept_b2(i32 signext %x) {
; RV64IMB: # %bb.0:
; RV64IMB-NEXT: sh3add a1, a0, a0
; RV64IMB-NEXT: slli a0, a0, 5
-; RV64IMB-NEXT: subw a0, a0, a1
+; RV64IMB-NEXT: sub a0, a0, a1
; RV64IMB-NEXT: lui a1, 50
; RV64IMB-NEXT: addi a1, a1, 1119
; RV64IMB-NEXT: addw a0, a0, a1
@@ -456,7 +456,7 @@ define i32 @add_mul_combine_reject_f1(i32 %x) {
; RV64IMB-NEXT: addi a0, a0, 1972
; RV64IMB-NEXT: sh1add a1, a0, a0
; RV64IMB-NEXT: slli a0, a0, 5
-; RV64IMB-NEXT: subw a0, a0, a1
+; RV64IMB-NEXT: sub a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, 11
; RV64IMB-NEXT: ret
%tmp0 = mul i32 %x, 29
@@ -479,7 +479,7 @@ define signext i32 @add_mul_combine_reject_f2(i32 signext %x) {
; RV64IMB-NEXT: addi a0, a0, 1972
; RV64IMB-NEXT: sh1add a1, a0, a0
; RV64IMB-NEXT: slli a0, a0, 5
-; RV64IMB-NEXT: subw a0, a0, a1
+; RV64IMB-NEXT: sub a0, a0, a1
; RV64IMB-NEXT: addiw a0, a0, 11
; RV64IMB-NEXT: ret
%tmp0 = mul i32 %x, 29
diff --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
index f3f71a9..34549a0 100644
--- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll
+++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
@@ -16,7 +16,7 @@ define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
; RV64I-NEXT: addi sp, sp, -16
; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT: subw s0, a1, a0
+; RV64I-NEXT: sub s0, a1, a0
; RV64I-NEXT: .LBB0_2: # %bb2
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: call hoge
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index bebc097..7d29ac9 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -4582,7 +4582,7 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB56_2: # %else
-; RV64I-NEXT: lwu a1, 0(a0)
+; RV64I-NEXT: lw a1, 0(a0)
; RV64I-NEXT: andi a2, a1, 1
; RV64I-NEXT: sw a2, 0(a0)
; RV64I-NEXT: sext.w a0, a1
@@ -4700,7 +4700,7 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: ret
; RV64I-NEXT: .LBB57_2: # %else
-; RV64I-NEXT: lwu a1, 0(a0)
+; RV64I-NEXT: lw a1, 0(a0)
; RV64I-NEXT: andi a2, a1, 1
; RV64I-NEXT: sw a2, 0(a0)
; RV64I-NEXT: sext.w a0, a1
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
index 27704d1..ea9786d 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
@@ -161,7 +161,7 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
; RV64IA-NEXT: sltu t0, t0, a5
; RV64IA-NEXT: addi t0, t0, -1
; RV64IA-NEXT: and t0, t0, a1
-; RV64IA-NEXT: subw a6, a6, t0
+; RV64IA-NEXT: sub a6, a6, t0
; RV64IA-NEXT: zext.b a6, a6
; RV64IA-NEXT: sllw a6, a6, a0
; RV64IA-NEXT: and a3, a3, a4
@@ -345,7 +345,7 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
; RV64IA-NEXT: sltu t1, t1, a6
; RV64IA-NEXT: addi t1, t1, -1
; RV64IA-NEXT: and t1, t1, a1
-; RV64IA-NEXT: subw a7, a7, t1
+; RV64IA-NEXT: sub a7, a7, t1
; RV64IA-NEXT: and a7, a7, a3
; RV64IA-NEXT: sllw a7, a7, a0
; RV64IA-NEXT: and a4, a4, a5
diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
index ada1933..4e04f38 100644
--- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll
@@ -150,7 +150,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; RV64IA-NEXT: zext.b a7, a5
; RV64IA-NEXT: addi a5, a5, 1
; RV64IA-NEXT: sltu a7, a7, a1
-; RV64IA-NEXT: negw a7, a7
+; RV64IA-NEXT: neg a7, a7
; RV64IA-NEXT: and a5, a7, a5
; RV64IA-NEXT: zext.b a5, a5
; RV64IA-NEXT: sllw a5, a5, a0
@@ -325,7 +325,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; RV64IA-NEXT: addi a6, a6, 1
; RV64IA-NEXT: sltu t0, t0, a1
; RV64IA-NEXT: and a6, a6, a3
-; RV64IA-NEXT: negw t0, t0
+; RV64IA-NEXT: neg t0, t0
; RV64IA-NEXT: and a6, t0, a6
; RV64IA-NEXT: sllw a6, a6, a0
; RV64IA-NEXT: and a4, a4, a5
diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
index 3422ea6..6207a17 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll
@@ -1074,7 +1074,7 @@ define bfloat @fcvt_bf16_wu_load(ptr %p) nounwind {
;
; CHECK64ZFBFMIN-LABEL: fcvt_bf16_wu_load:
; CHECK64ZFBFMIN: # %bb.0:
-; CHECK64ZFBFMIN-NEXT: lwu a0, 0(a0)
+; CHECK64ZFBFMIN-NEXT: lw a0, 0(a0)
; CHECK64ZFBFMIN-NEXT: fcvt.s.wu fa5, a0
; CHECK64ZFBFMIN-NEXT: fcvt.bf16.s fa0, fa5
; CHECK64ZFBFMIN-NEXT: ret
@@ -1083,7 +1083,7 @@ define bfloat @fcvt_bf16_wu_load(ptr %p) nounwind {
; RV64ID: # %bb.0:
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64ID-NEXT: lwu a0, 0(a0)
+; RV64ID-NEXT: lw a0, 0(a0)
; RV64ID-NEXT: fcvt.s.wu fa0, a0
; RV64ID-NEXT: call __truncsfbf2
; RV64ID-NEXT: fmv.x.w a0, fa0
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 72489185..530980c 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -63,7 +63,7 @@ define i8 @test_cttz_i8(i8 %a) nounwind {
; RV64NOZBB-NEXT: and a0, a0, a1
; RV64NOZBB-NEXT: srli a1, a0, 1
; RV64NOZBB-NEXT: andi a1, a1, 85
-; RV64NOZBB-NEXT: subw a0, a0, a1
+; RV64NOZBB-NEXT: sub a0, a0, a1
; RV64NOZBB-NEXT: andi a1, a0, 51
; RV64NOZBB-NEXT: srli a0, a0, 2
; RV64NOZBB-NEXT: andi a0, a0, 51
@@ -262,7 +262,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV64I-NEXT: sext.w a1, a0
; RV64I-NEXT: beqz a1, .LBB2_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli a1, a0, 6
; RV64I-NEXT: slli a2, a0, 8
@@ -270,16 +270,16 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV64I-NEXT: slli a4, a0, 12
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a0, 16
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 18
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a4, a0, 4
-; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: sub a4, a0, a4
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: slli a4, a0, 14
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 23
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a0, a0, 27
; RV64I-NEXT: add a1, a1, a3
; RV64I-NEXT: add a0, a2, a0
@@ -318,7 +318,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
; RV64M-NEXT: sext.w a1, a0
; RV64M-NEXT: beqz a1, .LBB2_2
; RV64M-NEXT: # %bb.1: # %cond.false
-; RV64M-NEXT: negw a1, a0
+; RV64M-NEXT: neg a1, a0
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 30667
; RV64M-NEXT: addi a1, a1, 1329
@@ -597,7 +597,7 @@ define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind {
; RV64NOZBB-NEXT: and a0, a0, a1
; RV64NOZBB-NEXT: srli a1, a0, 1
; RV64NOZBB-NEXT: andi a1, a1, 85
-; RV64NOZBB-NEXT: subw a0, a0, a1
+; RV64NOZBB-NEXT: sub a0, a0, a1
; RV64NOZBB-NEXT: andi a1, a0, 51
; RV64NOZBB-NEXT: srli a0, a0, 2
; RV64NOZBB-NEXT: andi a0, a0, 51
@@ -743,7 +743,7 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
;
; RV64I-LABEL: test_cttz_i32_zero_undef:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli a1, a0, 6
; RV64I-NEXT: slli a2, a0, 8
@@ -751,16 +751,16 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
; RV64I-NEXT: slli a4, a0, 12
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a0, 16
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 18
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a4, a0, 4
-; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: sub a4, a0, a4
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: slli a4, a0, 14
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 23
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a0, a0, 27
; RV64I-NEXT: add a1, a1, a3
; RV64I-NEXT: add a0, a2, a0
@@ -788,7 +788,7 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
;
; RV64M-LABEL: test_cttz_i32_zero_undef:
; RV64M: # %bb.0:
-; RV64M-NEXT: negw a1, a0
+; RV64M-NEXT: neg a1, a0
; RV64M-NEXT: and a0, a0, a1
; RV64M-NEXT: lui a1, 30667
; RV64M-NEXT: addi a1, a1, 1329
@@ -1039,7 +1039,7 @@ define i8 @test_ctlz_i8(i8 %a) nounwind {
; RV64NOZBB-NEXT: not a0, a0
; RV64NOZBB-NEXT: srli a1, a0, 1
; RV64NOZBB-NEXT: andi a1, a1, 85
-; RV64NOZBB-NEXT: subw a0, a0, a1
+; RV64NOZBB-NEXT: sub a0, a0, a1
; RV64NOZBB-NEXT: andi a1, a0, 51
; RV64NOZBB-NEXT: srli a0, a0, 2
; RV64NOZBB-NEXT: andi a0, a0, 51
@@ -1711,7 +1711,7 @@ define i8 @test_ctlz_i8_zero_undef(i8 %a) nounwind {
; RV64NOZBB-NEXT: not a0, a0
; RV64NOZBB-NEXT: srli a1, a0, 1
; RV64NOZBB-NEXT: andi a1, a1, 85
-; RV64NOZBB-NEXT: subw a0, a0, a1
+; RV64NOZBB-NEXT: sub a0, a0, a1
; RV64NOZBB-NEXT: andi a1, a0, 51
; RV64NOZBB-NEXT: srli a0, a0, 2
; RV64NOZBB-NEXT: andi a0, a0, 51
@@ -2296,7 +2296,7 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
; RV64NOZBB: # %bb.0:
; RV64NOZBB-NEXT: srli a1, a0, 1
; RV64NOZBB-NEXT: andi a1, a1, 85
-; RV64NOZBB-NEXT: subw a0, a0, a1
+; RV64NOZBB-NEXT: sub a0, a0, a1
; RV64NOZBB-NEXT: andi a1, a0, 51
; RV64NOZBB-NEXT: srli a0, a0, 2
; RV64NOZBB-NEXT: andi a0, a0, 51
@@ -2336,7 +2336,7 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srli a1, a0, 1
; RV64XTHEADBB-NEXT: andi a1, a1, 85
-; RV64XTHEADBB-NEXT: subw a0, a0, a1
+; RV64XTHEADBB-NEXT: sub a0, a0, a1
; RV64XTHEADBB-NEXT: andi a1, a0, 51
; RV64XTHEADBB-NEXT: srli a0, a0, 2
; RV64XTHEADBB-NEXT: andi a0, a0, 51
diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
index 637fb31..a1061fbb 100644
--- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
+++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
@@ -163,7 +163,7 @@ define i64 @ctz_dereferencing_pointer_zext(ptr %b) nounwind {
; RV64I-LABEL: ctz_dereferencing_pointer_zext:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lw a0, 0(a0)
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a1, 6
; RV64I-NEXT: slli a3, a1, 8
@@ -171,16 +171,16 @@ define i64 @ctz_dereferencing_pointer_zext(ptr %b) nounwind {
; RV64I-NEXT: slli a5, a1, 12
; RV64I-NEXT: add a2, a2, a3
; RV64I-NEXT: slli a3, a1, 16
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 18
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a5, a1, 4
-; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: sub a5, a1, a5
; RV64I-NEXT: add a2, a5, a2
; RV64I-NEXT: slli a5, a1, 14
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 23
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a1, a1, 27
; RV64I-NEXT: add a2, a2, a4
; RV64I-NEXT: add a1, a3, a1
@@ -248,7 +248,7 @@ define signext i32 @ctz1(i32 signext %x) nounwind {
;
; RV64I-LABEL: ctz1:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a1, 6
; RV64I-NEXT: slli a3, a1, 8
@@ -256,16 +256,16 @@ define signext i32 @ctz1(i32 signext %x) nounwind {
; RV64I-NEXT: slli a5, a1, 12
; RV64I-NEXT: add a2, a2, a3
; RV64I-NEXT: slli a3, a1, 16
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 18
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a5, a1, 4
-; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: sub a5, a1, a5
; RV64I-NEXT: add a2, a5, a2
; RV64I-NEXT: slli a5, a1, 14
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 23
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a1, a1, 27
; RV64I-NEXT: add a2, a2, a4
; RV64I-NEXT: add a1, a3, a1
@@ -331,7 +331,7 @@ define signext i32 @ctz1_flipped(i32 signext %x) nounwind {
;
; RV64I-LABEL: ctz1_flipped:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a1, 6
; RV64I-NEXT: slli a3, a1, 8
@@ -339,16 +339,16 @@ define signext i32 @ctz1_flipped(i32 signext %x) nounwind {
; RV64I-NEXT: slli a5, a1, 12
; RV64I-NEXT: add a2, a2, a3
; RV64I-NEXT: slli a3, a1, 16
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 18
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a5, a1, 4
-; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: sub a5, a1, a5
; RV64I-NEXT: add a2, a5, a2
; RV64I-NEXT: slli a5, a1, 14
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 23
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a1, a1, 27
; RV64I-NEXT: add a2, a2, a4
; RV64I-NEXT: add a1, a3, a1
@@ -412,7 +412,7 @@ define signext i32 @ctz2(i32 signext %x) nounwind {
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: beqz a0, .LBB4_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli a1, a0, 6
; RV64I-NEXT: slli a2, a0, 8
@@ -420,16 +420,16 @@ define signext i32 @ctz2(i32 signext %x) nounwind {
; RV64I-NEXT: slli a4, a0, 12
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a0, 16
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 18
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a4, a0, 4
-; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: sub a4, a0, a4
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: slli a4, a0, 14
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 23
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a0, a0, 27
; RV64I-NEXT: add a1, a1, a3
; RV64I-NEXT: add a0, a2, a0
@@ -490,7 +490,7 @@ define signext i32 @ctz3(i32 signext %x) nounwind {
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: beqz a0, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli a1, a0, 6
; RV64I-NEXT: slli a2, a0, 8
@@ -498,16 +498,16 @@ define signext i32 @ctz3(i32 signext %x) nounwind {
; RV64I-NEXT: slli a4, a0, 12
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a0, 16
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 18
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a4, a0, 4
-; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: sub a4, a0, a4
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: slli a4, a0, 14
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 23
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a0, a0, 27
; RV64I-NEXT: add a1, a1, a3
; RV64I-NEXT: add a0, a2, a0
@@ -824,7 +824,7 @@ define signext i32 @ctz5(i32 signext %x) nounwind {
;
; RV64I-LABEL: ctz5:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a1, 6
; RV64I-NEXT: slli a3, a1, 8
@@ -832,16 +832,16 @@ define signext i32 @ctz5(i32 signext %x) nounwind {
; RV64I-NEXT: slli a5, a1, 12
; RV64I-NEXT: add a2, a2, a3
; RV64I-NEXT: slli a3, a1, 16
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 18
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a5, a1, 4
-; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: sub a5, a1, a5
; RV64I-NEXT: add a2, a5, a2
; RV64I-NEXT: slli a5, a1, 14
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 23
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a1, a1, 27
; RV64I-NEXT: add a2, a2, a4
; RV64I-NEXT: add a1, a3, a1
@@ -907,7 +907,7 @@ define signext i32 @ctz6(i32 signext %x) nounwind {
;
; RV64I-LABEL: ctz6:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a1, 6
; RV64I-NEXT: slli a3, a1, 8
@@ -915,16 +915,16 @@ define signext i32 @ctz6(i32 signext %x) nounwind {
; RV64I-NEXT: slli a5, a1, 12
; RV64I-NEXT: add a2, a2, a3
; RV64I-NEXT: slli a3, a1, 16
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 18
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a5, a1, 4
-; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: sub a5, a1, a5
; RV64I-NEXT: add a2, a5, a2
; RV64I-NEXT: slli a5, a1, 14
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 23
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a1, a1, 27
; RV64I-NEXT: add a2, a2, a4
; RV64I-NEXT: add a1, a3, a1
@@ -997,7 +997,7 @@ define signext i32 @globalVar() nounwind {
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: lui a0, %hi(global_x)
; RV64I-NEXT: lw a0, %lo(global_x)(a0)
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a1, 6
; RV64I-NEXT: slli a3, a1, 8
@@ -1005,16 +1005,16 @@ define signext i32 @globalVar() nounwind {
; RV64I-NEXT: slli a5, a1, 12
; RV64I-NEXT: add a2, a2, a3
; RV64I-NEXT: slli a3, a1, 16
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 18
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a5, a1, 4
-; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: sub a5, a1, a5
; RV64I-NEXT: add a2, a5, a2
; RV64I-NEXT: slli a5, a1, 14
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 23
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a1, a1, 27
; RV64I-NEXT: add a2, a2, a4
; RV64I-NEXT: add a1, a3, a1
diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll
index ea8b04d..53c3f58 100644
--- a/llvm/test/CodeGen/RISCV/div-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll
@@ -54,7 +54,7 @@ define i32 @udiv_constant_add(i32 %a) nounwind {
; RV64IM-NEXT: slli a2, a2, 32
; RV64IM-NEXT: mulhu a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 32
-; RV64IM-NEXT: subw a0, a0, a1
+; RV64IM-NEXT: sub a0, a0, a1
; RV64IM-NEXT: srliw a0, a0, 1
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: srli a0, a0, 2
@@ -67,7 +67,7 @@ define i32 @udiv_constant_add(i32 %a) nounwind {
; RV64IMZB-NEXT: addi a2, a2, -1755
; RV64IMZB-NEXT: mul a1, a1, a2
; RV64IMZB-NEXT: srli a1, a1, 32
-; RV64IMZB-NEXT: subw a0, a0, a1
+; RV64IMZB-NEXT: sub a0, a0, a1
; RV64IMZB-NEXT: srliw a0, a0, 1
; RV64IMZB-NEXT: add a0, a0, a1
; RV64IMZB-NEXT: srli a0, a0, 2
@@ -193,7 +193,7 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
; RV64IM-NEXT: li a2, 37
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 8
-; RV64IM-NEXT: subw a0, a0, a1
+; RV64IM-NEXT: sub a0, a0, a1
; RV64IM-NEXT: slli a0, a0, 56
; RV64IM-NEXT: srli a0, a0, 57
; RV64IM-NEXT: add a0, a0, a1
@@ -206,7 +206,7 @@ define i8 @udiv8_constant_add(i8 %a) nounwind {
; RV64IMZB-NEXT: sh3add a2, a1, a1
; RV64IMZB-NEXT: sh2add a1, a2, a1
; RV64IMZB-NEXT: srli a1, a1, 8
-; RV64IMZB-NEXT: subw a0, a0, a1
+; RV64IMZB-NEXT: sub a0, a0, a1
; RV64IMZB-NEXT: slli a0, a0, 56
; RV64IMZB-NEXT: srli a0, a0, 57
; RV64IMZB-NEXT: add a0, a0, a1
@@ -257,7 +257,7 @@ define i16 @udiv16_constant_add(i16 %a) nounwind {
; RV64-NEXT: lui a2, 149808
; RV64-NEXT: mulhu a1, a1, a2
; RV64-NEXT: srli a1, a1, 16
-; RV64-NEXT: subw a0, a0, a1
+; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 49
; RV64-NEXT: add a0, a0, a1
@@ -367,7 +367,7 @@ define i32 @sdiv_constant_sub_srai(i32 %a) nounwind {
; RV64-NEXT: addi a2, a2, -1171
; RV64-NEXT: mul a1, a1, a2
; RV64-NEXT: srli a1, a1, 32
-; RV64-NEXT: subw a1, a1, a0
+; RV64-NEXT: sub a1, a1, a0
; RV64-NEXT: srliw a0, a1, 31
; RV64-NEXT: sraiw a1, a1, 2
; RV64-NEXT: add a0, a1, a0
@@ -666,7 +666,7 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV64IM-NEXT: srai a1, a1, 56
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 8
-; RV64IM-NEXT: subw a1, a1, a0
+; RV64IM-NEXT: sub a1, a1, a0
; RV64IM-NEXT: slli a1, a1, 56
; RV64IM-NEXT: srli a0, a1, 63
; RV64IM-NEXT: srai a1, a1, 58
@@ -679,7 +679,7 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV64IMZB-NEXT: li a2, 109
; RV64IMZB-NEXT: mul a1, a1, a2
; RV64IMZB-NEXT: srli a1, a1, 8
-; RV64IMZB-NEXT: subw a1, a1, a0
+; RV64IMZB-NEXT: sub a1, a1, a0
; RV64IMZB-NEXT: slli a1, a1, 56
; RV64IMZB-NEXT: srli a0, a1, 63
; RV64IMZB-NEXT: srai a1, a1, 58
@@ -889,7 +889,7 @@ define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
; RV64IM-NEXT: addi a2, a2, 1911
; RV64IM-NEXT: mul a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 16
-; RV64IM-NEXT: subw a1, a1, a0
+; RV64IM-NEXT: sub a1, a1, a0
; RV64IM-NEXT: slli a1, a1, 48
; RV64IM-NEXT: srli a0, a1, 63
; RV64IM-NEXT: srai a1, a1, 51
@@ -903,7 +903,7 @@ define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
; RV64IMZB-NEXT: addi a2, a2, 1911
; RV64IMZB-NEXT: mul a1, a1, a2
; RV64IMZB-NEXT: srli a1, a1, 16
-; RV64IMZB-NEXT: subw a1, a1, a0
+; RV64IMZB-NEXT: sub a1, a1, a0
; RV64IMZB-NEXT: slli a1, a1, 48
; RV64IMZB-NEXT: srli a0, a1, 63
; RV64IMZB-NEXT: srai a1, a1, 51
diff --git a/llvm/test/CodeGen/RISCV/double-convert-strict.ll b/llvm/test/CodeGen/RISCV/double-convert-strict.ll
index 2b1ec10..9a5e357 100644
--- a/llvm/test/CodeGen/RISCV/double-convert-strict.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert-strict.ll
@@ -347,17 +347,11 @@ define double @fcvt_d_wu(i32 %a) nounwind strictfp {
declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
define double @fcvt_d_wu_load(ptr %p) nounwind strictfp {
-; RV32IFD-LABEL: fcvt_d_wu_load:
-; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: lw a0, 0(a0)
-; RV32IFD-NEXT: fcvt.d.wu fa0, a0
-; RV32IFD-NEXT: ret
-;
-; RV64IFD-LABEL: fcvt_d_wu_load:
-; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: lwu a0, 0(a0)
-; RV64IFD-NEXT: fcvt.d.wu fa0, a0
-; RV64IFD-NEXT: ret
+; CHECKIFD-LABEL: fcvt_d_wu_load:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: lw a0, 0(a0)
+; CHECKIFD-NEXT: fcvt.d.wu fa0, a0
+; CHECKIFD-NEXT: ret
;
; RV32IZFINXZDINX-LABEL: fcvt_d_wu_load:
; RV32IZFINXZDINX: # %bb.0:
@@ -367,7 +361,7 @@ define double @fcvt_d_wu_load(ptr %p) nounwind strictfp {
;
; RV64IZFINXZDINX-LABEL: fcvt_d_wu_load:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: lwu a0, 0(a0)
+; RV64IZFINXZDINX-NEXT: lw a0, 0(a0)
; RV64IZFINXZDINX-NEXT: fcvt.d.wu a0, a0
; RV64IZFINXZDINX-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index fad9e21..a2e6186 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -582,17 +582,11 @@ define double @fcvt_d_wu(i32 %a) nounwind {
}
define double @fcvt_d_wu_load(ptr %p) nounwind {
-; RV32IFD-LABEL: fcvt_d_wu_load:
-; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: lw a0, 0(a0)
-; RV32IFD-NEXT: fcvt.d.wu fa0, a0
-; RV32IFD-NEXT: ret
-;
-; RV64IFD-LABEL: fcvt_d_wu_load:
-; RV64IFD: # %bb.0:
-; RV64IFD-NEXT: lwu a0, 0(a0)
-; RV64IFD-NEXT: fcvt.d.wu fa0, a0
-; RV64IFD-NEXT: ret
+; CHECKIFD-LABEL: fcvt_d_wu_load:
+; CHECKIFD: # %bb.0:
+; CHECKIFD-NEXT: lw a0, 0(a0)
+; CHECKIFD-NEXT: fcvt.d.wu fa0, a0
+; CHECKIFD-NEXT: ret
;
; RV32IZFINXZDINX-LABEL: fcvt_d_wu_load:
; RV32IZFINXZDINX: # %bb.0:
@@ -602,7 +596,7 @@ define double @fcvt_d_wu_load(ptr %p) nounwind {
;
; RV64IZFINXZDINX-LABEL: fcvt_d_wu_load:
; RV64IZFINXZDINX: # %bb.0:
-; RV64IZFINXZDINX-NEXT: lwu a0, 0(a0)
+; RV64IZFINXZDINX-NEXT: lw a0, 0(a0)
; RV64IZFINXZDINX-NEXT: fcvt.d.wu a0, a0
; RV64IZFINXZDINX-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/float-convert-strict.ll b/llvm/test/CodeGen/RISCV/float-convert-strict.ll
index 0c265e1..1b25a2b 100644
--- a/llvm/test/CodeGen/RISCV/float-convert-strict.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert-strict.ll
@@ -236,29 +236,17 @@ define float @fcvt_s_wu(i32 %a) nounwind strictfp {
declare float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata, metadata)
define float @fcvt_s_wu_load(ptr %p) nounwind strictfp {
-; RV32IF-LABEL: fcvt_s_wu_load:
-; RV32IF: # %bb.0:
-; RV32IF-NEXT: lw a0, 0(a0)
-; RV32IF-NEXT: fcvt.s.wu fa0, a0
-; RV32IF-NEXT: ret
-;
-; RV64IF-LABEL: fcvt_s_wu_load:
-; RV64IF: # %bb.0:
-; RV64IF-NEXT: lwu a0, 0(a0)
-; RV64IF-NEXT: fcvt.s.wu fa0, a0
-; RV64IF-NEXT: ret
-;
-; RV32IZFINX-LABEL: fcvt_s_wu_load:
-; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: lw a0, 0(a0)
-; RV32IZFINX-NEXT: fcvt.s.wu a0, a0
-; RV32IZFINX-NEXT: ret
+; CHECKIF-LABEL: fcvt_s_wu_load:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: lw a0, 0(a0)
+; CHECKIF-NEXT: fcvt.s.wu fa0, a0
+; CHECKIF-NEXT: ret
;
-; RV64IZFINX-LABEL: fcvt_s_wu_load:
-; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: lwu a0, 0(a0)
-; RV64IZFINX-NEXT: fcvt.s.wu a0, a0
-; RV64IZFINX-NEXT: ret
+; CHECKIZFINX-LABEL: fcvt_s_wu_load:
+; CHECKIZFINX: # %bb.0:
+; CHECKIZFINX-NEXT: lw a0, 0(a0)
+; CHECKIZFINX-NEXT: fcvt.s.wu a0, a0
+; CHECKIZFINX-NEXT: ret
;
; RV32I-LABEL: fcvt_s_wu_load:
; RV32I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index 1cb7b27..60349a0 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -482,29 +482,17 @@ define float @fcvt_s_wu(i32 %a) nounwind {
}
define float @fcvt_s_wu_load(ptr %p) nounwind {
-; RV32IF-LABEL: fcvt_s_wu_load:
-; RV32IF: # %bb.0:
-; RV32IF-NEXT: lw a0, 0(a0)
-; RV32IF-NEXT: fcvt.s.wu fa0, a0
-; RV32IF-NEXT: ret
-;
-; RV64IF-LABEL: fcvt_s_wu_load:
-; RV64IF: # %bb.0:
-; RV64IF-NEXT: lwu a0, 0(a0)
-; RV64IF-NEXT: fcvt.s.wu fa0, a0
-; RV64IF-NEXT: ret
-;
-; RV32IZFINX-LABEL: fcvt_s_wu_load:
-; RV32IZFINX: # %bb.0:
-; RV32IZFINX-NEXT: lw a0, 0(a0)
-; RV32IZFINX-NEXT: fcvt.s.wu a0, a0
-; RV32IZFINX-NEXT: ret
+; CHECKIF-LABEL: fcvt_s_wu_load:
+; CHECKIF: # %bb.0:
+; CHECKIF-NEXT: lw a0, 0(a0)
+; CHECKIF-NEXT: fcvt.s.wu fa0, a0
+; CHECKIF-NEXT: ret
;
-; RV64IZFINX-LABEL: fcvt_s_wu_load:
-; RV64IZFINX: # %bb.0:
-; RV64IZFINX-NEXT: lwu a0, 0(a0)
-; RV64IZFINX-NEXT: fcvt.s.wu a0, a0
-; RV64IZFINX-NEXT: ret
+; CHECKIZFINX-LABEL: fcvt_s_wu_load:
+; CHECKIZFINX: # %bb.0:
+; CHECKIZFINX-NEXT: lw a0, 0(a0)
+; CHECKIZFINX-NEXT: fcvt.s.wu a0, a0
+; CHECKIZFINX-NEXT: ret
;
; RV32I-LABEL: fcvt_s_wu_load:
; RV32I: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 246e6a6..117e3e4 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -3292,30 +3292,30 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti
-; RV32IF-NEXT: lw a0, 8(sp)
-; RV32IF-NEXT: lw a1, 12(sp)
-; RV32IF-NEXT: lw a2, 20(sp)
+; RV32IF-NEXT: lw a0, 20(sp)
+; RV32IF-NEXT: lw a1, 8(sp)
+; RV32IF-NEXT: lw a2, 12(sp)
; RV32IF-NEXT: lw a3, 16(sp)
-; RV32IF-NEXT: beqz a2, .LBB47_2
+; RV32IF-NEXT: beqz a0, .LBB47_2
; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: slti a4, a2, 0
+; RV32IF-NEXT: slti a4, a0, 0
; RV32IF-NEXT: j .LBB47_3
; RV32IF-NEXT: .LBB47_2:
; RV32IF-NEXT: seqz a4, a3
; RV32IF-NEXT: .LBB47_3: # %entry
; RV32IF-NEXT: xori a3, a3, 1
-; RV32IF-NEXT: or a3, a3, a2
+; RV32IF-NEXT: or a3, a3, a0
; RV32IF-NEXT: seqz a3, a3
; RV32IF-NEXT: addi a3, a3, -1
; RV32IF-NEXT: and a3, a3, a4
; RV32IF-NEXT: neg a3, a3
+; RV32IF-NEXT: and a2, a3, a2
; RV32IF-NEXT: and a1, a3, a1
; RV32IF-NEXT: and a0, a3, a0
-; RV32IF-NEXT: and a2, a3, a2
-; RV32IF-NEXT: slti a2, a2, 0
-; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: and a0, a2, a0
-; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: slti a0, a0, 0
+; RV32IF-NEXT: addi a3, a0, -1
+; RV32IF-NEXT: and a0, a3, a1
+; RV32IF-NEXT: and a1, a3, a2
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: .cfi_restore ra
; RV32IF-NEXT: addi sp, sp, 32
@@ -3354,30 +3354,30 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti
-; RV32IFD-NEXT: lw a0, 8(sp)
-; RV32IFD-NEXT: lw a1, 12(sp)
-; RV32IFD-NEXT: lw a2, 20(sp)
+; RV32IFD-NEXT: lw a0, 20(sp)
+; RV32IFD-NEXT: lw a1, 8(sp)
+; RV32IFD-NEXT: lw a2, 12(sp)
; RV32IFD-NEXT: lw a3, 16(sp)
-; RV32IFD-NEXT: beqz a2, .LBB47_2
+; RV32IFD-NEXT: beqz a0, .LBB47_2
; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: slti a4, a2, 0
+; RV32IFD-NEXT: slti a4, a0, 0
; RV32IFD-NEXT: j .LBB47_3
; RV32IFD-NEXT: .LBB47_2:
; RV32IFD-NEXT: seqz a4, a3
; RV32IFD-NEXT: .LBB47_3: # %entry
; RV32IFD-NEXT: xori a3, a3, 1
-; RV32IFD-NEXT: or a3, a3, a2
+; RV32IFD-NEXT: or a3, a3, a0
; RV32IFD-NEXT: seqz a3, a3
; RV32IFD-NEXT: addi a3, a3, -1
; RV32IFD-NEXT: and a3, a3, a4
; RV32IFD-NEXT: neg a3, a3
+; RV32IFD-NEXT: and a2, a3, a2
; RV32IFD-NEXT: and a1, a3, a1
; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: and a2, a3, a2
-; RV32IFD-NEXT: slti a2, a2, 0
-; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: and a0, a2, a0
-; RV32IFD-NEXT: and a1, a2, a1
+; RV32IFD-NEXT: slti a0, a0, 0
+; RV32IFD-NEXT: addi a3, a0, -1
+; RV32IFD-NEXT: and a0, a3, a1
+; RV32IFD-NEXT: and a1, a3, a2
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: .cfi_restore ra
; RV32IFD-NEXT: addi sp, sp, 32
@@ -3530,30 +3530,30 @@ define i64 @ustest_f32i64_mm(float %x) {
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a1, 8(sp)
+; RV32-NEXT: lw a2, 12(sp)
; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: beqz a2, .LBB50_2
+; RV32-NEXT: beqz a0, .LBB50_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a4, a2, 0
+; RV32-NEXT: slti a4, a0, 0
; RV32-NEXT: j .LBB50_3
; RV32-NEXT: .LBB50_2:
; RV32-NEXT: seqz a4, a3
; RV32-NEXT: .LBB50_3: # %entry
; RV32-NEXT: xori a3, a3, 1
-; RV32-NEXT: or a3, a3, a2
+; RV32-NEXT: or a3, a3, a0
; RV32-NEXT: seqz a3, a3
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a3, a3, a4
; RV32-NEXT: neg a3, a3
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: and a1, a3, a1
; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a2, a3, a2
-; RV32-NEXT: slti a2, a2, 0
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: slti a0, a0, 0
+; RV32-NEXT: addi a3, a0, -1
+; RV32-NEXT: and a0, a3, a1
+; RV32-NEXT: and a1, a3, a2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: addi sp, sp, 32
@@ -3767,30 +3767,30 @@ define i64 @ustest_f16i64_mm(half %x) {
; RV32-NEXT: call __extendhfsf2
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a1, 8(sp)
+; RV32-NEXT: lw a2, 12(sp)
; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: beqz a2, .LBB53_2
+; RV32-NEXT: beqz a0, .LBB53_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a4, a2, 0
+; RV32-NEXT: slti a4, a0, 0
; RV32-NEXT: j .LBB53_3
; RV32-NEXT: .LBB53_2:
; RV32-NEXT: seqz a4, a3
; RV32-NEXT: .LBB53_3: # %entry
; RV32-NEXT: xori a3, a3, 1
-; RV32-NEXT: or a3, a3, a2
+; RV32-NEXT: or a3, a3, a0
; RV32-NEXT: seqz a3, a3
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a3, a3, a4
; RV32-NEXT: neg a3, a3
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: and a1, a3, a1
; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a2, a3, a2
-; RV32-NEXT: slti a2, a2, 0
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: slti a0, a0, 0
+; RV32-NEXT: addi a3, a0, -1
+; RV32-NEXT: and a0, a3, a1
+; RV32-NEXT: and a1, a3, a2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: addi sp, sp, 32
diff --git a/llvm/test/CodeGen/RISCV/half-convert-strict.ll b/llvm/test/CodeGen/RISCV/half-convert-strict.ll
index 0a04d44..675e230 100644
--- a/llvm/test/CodeGen/RISCV/half-convert-strict.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert-strict.ll
@@ -1461,29 +1461,17 @@ define half @fcvt_h_wu(i32 %a) nounwind strictfp {
declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
define half @fcvt_h_wu_load(ptr %p) nounwind strictfp {
-; RV32IZFH-LABEL: fcvt_h_wu_load:
-; RV32IZFH: # %bb.0:
-; RV32IZFH-NEXT: lw a0, 0(a0)
-; RV32IZFH-NEXT: fcvt.h.wu fa0, a0
-; RV32IZFH-NEXT: ret
-;
-; RV64IZFH-LABEL: fcvt_h_wu_load:
-; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: lwu a0, 0(a0)
-; RV64IZFH-NEXT: fcvt.h.wu fa0, a0
-; RV64IZFH-NEXT: ret
-;
-; RV32IZHINX-LABEL: fcvt_h_wu_load:
-; RV32IZHINX: # %bb.0:
-; RV32IZHINX-NEXT: lw a0, 0(a0)
-; RV32IZHINX-NEXT: fcvt.h.wu a0, a0
-; RV32IZHINX-NEXT: ret
+; CHECKIZFH-LABEL: fcvt_h_wu_load:
+; CHECKIZFH: # %bb.0:
+; CHECKIZFH-NEXT: lw a0, 0(a0)
+; CHECKIZFH-NEXT: fcvt.h.wu fa0, a0
+; CHECKIZFH-NEXT: ret
;
-; RV64IZHINX-LABEL: fcvt_h_wu_load:
-; RV64IZHINX: # %bb.0:
-; RV64IZHINX-NEXT: lwu a0, 0(a0)
-; RV64IZHINX-NEXT: fcvt.h.wu a0, a0
-; RV64IZHINX-NEXT: ret
+; CHECKIZHINX-LABEL: fcvt_h_wu_load:
+; CHECKIZHINX: # %bb.0:
+; CHECKIZHINX-NEXT: lw a0, 0(a0)
+; CHECKIZHINX-NEXT: fcvt.h.wu a0, a0
+; CHECKIZHINX-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_h_wu_load:
; RV32IDZFH: # %bb.0:
@@ -1493,7 +1481,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind strictfp {
;
; RV64IDZFH-LABEL: fcvt_h_wu_load:
; RV64IDZFH: # %bb.0:
-; RV64IDZFH-NEXT: lwu a0, 0(a0)
+; RV64IDZFH-NEXT: lw a0, 0(a0)
; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0
; RV64IDZFH-NEXT: ret
;
@@ -1505,7 +1493,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind strictfp {
;
; RV64IZDINXZHINX-LABEL: fcvt_h_wu_load:
; RV64IZDINXZHINX: # %bb.0:
-; RV64IZDINXZHINX-NEXT: lwu a0, 0(a0)
+; RV64IZDINXZHINX-NEXT: lw a0, 0(a0)
; RV64IZDINXZHINX-NEXT: fcvt.h.wu a0, a0
; RV64IZDINXZHINX-NEXT: ret
;
@@ -1518,7 +1506,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind strictfp {
;
; CHECK64-IZFHMIN-LABEL: fcvt_h_wu_load:
; CHECK64-IZFHMIN: # %bb.0:
-; CHECK64-IZFHMIN-NEXT: lwu a0, 0(a0)
+; CHECK64-IZFHMIN-NEXT: lw a0, 0(a0)
; CHECK64-IZFHMIN-NEXT: fcvt.s.wu fa5, a0
; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, fa5
; CHECK64-IZFHMIN-NEXT: ret
@@ -1532,7 +1520,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind strictfp {
;
; CHECK64-IZHINXMIN-LABEL: fcvt_h_wu_load:
; CHECK64-IZHINXMIN: # %bb.0:
-; CHECK64-IZHINXMIN-NEXT: lwu a0, 0(a0)
+; CHECK64-IZHINXMIN-NEXT: lw a0, 0(a0)
; CHECK64-IZHINXMIN-NEXT: fcvt.s.wu a0, a0
; CHECK64-IZHINXMIN-NEXT: fcvt.h.s a0, a0
; CHECK64-IZHINXMIN-NEXT: ret
@@ -1546,7 +1534,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind strictfp {
;
; CHECK64-IZDINXZHINXMIN-LABEL: fcvt_h_wu_load:
; CHECK64-IZDINXZHINXMIN: # %bb.0:
-; CHECK64-IZDINXZHINXMIN-NEXT: lwu a0, 0(a0)
+; CHECK64-IZDINXZHINXMIN-NEXT: lw a0, 0(a0)
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.wu a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.h.s a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index c53237e..facb544 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -4388,17 +4388,11 @@ define half @fcvt_h_wu(i32 %a) nounwind {
}
define half @fcvt_h_wu_load(ptr %p) nounwind {
-; RV32IZFH-LABEL: fcvt_h_wu_load:
-; RV32IZFH: # %bb.0:
-; RV32IZFH-NEXT: lw a0, 0(a0)
-; RV32IZFH-NEXT: fcvt.h.wu fa0, a0
-; RV32IZFH-NEXT: ret
-;
-; RV64IZFH-LABEL: fcvt_h_wu_load:
-; RV64IZFH: # %bb.0:
-; RV64IZFH-NEXT: lwu a0, 0(a0)
-; RV64IZFH-NEXT: fcvt.h.wu fa0, a0
-; RV64IZFH-NEXT: ret
+; CHECKIZFH-LABEL: fcvt_h_wu_load:
+; CHECKIZFH: # %bb.0:
+; CHECKIZFH-NEXT: lw a0, 0(a0)
+; CHECKIZFH-NEXT: fcvt.h.wu fa0, a0
+; CHECKIZFH-NEXT: ret
;
; RV32IDZFH-LABEL: fcvt_h_wu_load:
; RV32IDZFH: # %bb.0:
@@ -4408,33 +4402,21 @@ define half @fcvt_h_wu_load(ptr %p) nounwind {
;
; RV64IDZFH-LABEL: fcvt_h_wu_load:
; RV64IDZFH: # %bb.0:
-; RV64IDZFH-NEXT: lwu a0, 0(a0)
+; RV64IDZFH-NEXT: lw a0, 0(a0)
; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0
; RV64IDZFH-NEXT: ret
;
-; RV32IZHINX-LABEL: fcvt_h_wu_load:
-; RV32IZHINX: # %bb.0:
-; RV32IZHINX-NEXT: lw a0, 0(a0)
-; RV32IZHINX-NEXT: fcvt.h.wu a0, a0
-; RV32IZHINX-NEXT: ret
-;
-; RV64IZHINX-LABEL: fcvt_h_wu_load:
-; RV64IZHINX: # %bb.0:
-; RV64IZHINX-NEXT: lwu a0, 0(a0)
-; RV64IZHINX-NEXT: fcvt.h.wu a0, a0
-; RV64IZHINX-NEXT: ret
-;
-; RV32IZDINXZHINX-LABEL: fcvt_h_wu_load:
-; RV32IZDINXZHINX: # %bb.0:
-; RV32IZDINXZHINX-NEXT: lw a0, 0(a0)
-; RV32IZDINXZHINX-NEXT: fcvt.h.wu a0, a0
-; RV32IZDINXZHINX-NEXT: ret
+; CHECKIZHINX-LABEL: fcvt_h_wu_load:
+; CHECKIZHINX: # %bb.0:
+; CHECKIZHINX-NEXT: lw a0, 0(a0)
+; CHECKIZHINX-NEXT: fcvt.h.wu a0, a0
+; CHECKIZHINX-NEXT: ret
;
-; RV64IZDINXZHINX-LABEL: fcvt_h_wu_load:
-; RV64IZDINXZHINX: # %bb.0:
-; RV64IZDINXZHINX-NEXT: lwu a0, 0(a0)
-; RV64IZDINXZHINX-NEXT: fcvt.h.wu a0, a0
-; RV64IZDINXZHINX-NEXT: ret
+; CHECKIZDINXZHINX-LABEL: fcvt_h_wu_load:
+; CHECKIZDINXZHINX: # %bb.0:
+; CHECKIZDINXZHINX-NEXT: lw a0, 0(a0)
+; CHECKIZDINXZHINX-NEXT: fcvt.h.wu a0, a0
+; CHECKIZDINXZHINX-NEXT: ret
;
; RV32I-LABEL: fcvt_h_wu_load:
; RV32I: # %bb.0:
@@ -4476,7 +4458,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind {
; RV64ID-LP64: # %bb.0:
; RV64ID-LP64-NEXT: addi sp, sp, -16
; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64ID-LP64-NEXT: lwu a0, 0(a0)
+; RV64ID-LP64-NEXT: lw a0, 0(a0)
; RV64ID-LP64-NEXT: fcvt.s.wu fa5, a0
; RV64ID-LP64-NEXT: fmv.x.w a0, fa5
; RV64ID-LP64-NEXT: call __truncsfhf2
@@ -4505,7 +4487,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind {
; RV64ID: # %bb.0:
; RV64ID-NEXT: addi sp, sp, -16
; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64ID-NEXT: lwu a0, 0(a0)
+; RV64ID-NEXT: lw a0, 0(a0)
; RV64ID-NEXT: fcvt.s.wu fa0, a0
; RV64ID-NEXT: call __truncsfhf2
; RV64ID-NEXT: fmv.x.w a0, fa0
@@ -4525,7 +4507,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind {
;
; CHECK64-IZFHMIN-LABEL: fcvt_h_wu_load:
; CHECK64-IZFHMIN: # %bb.0:
-; CHECK64-IZFHMIN-NEXT: lwu a0, 0(a0)
+; CHECK64-IZFHMIN-NEXT: lw a0, 0(a0)
; CHECK64-IZFHMIN-NEXT: fcvt.s.wu fa5, a0
; CHECK64-IZFHMIN-NEXT: fcvt.h.s fa0, fa5
; CHECK64-IZFHMIN-NEXT: ret
@@ -4539,7 +4521,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind {
;
; CHECK64-IZHINXMIN-LABEL: fcvt_h_wu_load:
; CHECK64-IZHINXMIN: # %bb.0:
-; CHECK64-IZHINXMIN-NEXT: lwu a0, 0(a0)
+; CHECK64-IZHINXMIN-NEXT: lw a0, 0(a0)
; CHECK64-IZHINXMIN-NEXT: fcvt.s.wu a0, a0
; CHECK64-IZHINXMIN-NEXT: fcvt.h.s a0, a0
; CHECK64-IZHINXMIN-NEXT: ret
@@ -4553,7 +4535,7 @@ define half @fcvt_h_wu_load(ptr %p) nounwind {
;
; CHECK64-IZDINXZHINXMIN-LABEL: fcvt_h_wu_load:
; CHECK64-IZDINXZHINXMIN: # %bb.0:
-; CHECK64-IZDINXZHINXMIN-NEXT: lwu a0, 0(a0)
+; CHECK64-IZDINXZHINXMIN-NEXT: lw a0, 0(a0)
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.s.wu a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: fcvt.h.s a0, a0
; CHECK64-IZDINXZHINXMIN-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll
index 66cde32..774f1a1 100644
--- a/llvm/test/CodeGen/RISCV/iabs.ll
+++ b/llvm/test/CodeGen/RISCV/iabs.ll
@@ -651,7 +651,7 @@ define void @zext16_abs8(i8 %x, ptr %p) {
; RV64I-NEXT: srai a2, a0, 63
; RV64I-NEXT: srai a0, a0, 56
; RV64I-NEXT: xor a0, a0, a2
-; RV64I-NEXT: subw a0, a0, a2
+; RV64I-NEXT: sub a0, a0, a2
; RV64I-NEXT: sh a0, 0(a1)
; RV64I-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll
index e278b8d..472b903 100644
--- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll
+++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll
@@ -794,498 +794,46 @@ define void @foo_with_call() #1 {
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: sub sp, sp, a0
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 5
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: mv a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 1
; CHECK-RV32-V-NEXT: add a0, a0, a1
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 4
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: addi a0, sp, 16
-; CHECK-RV32-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: call otherfoo
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 5
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: mv a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: add a0, a0, a1
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 4
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: addi a0, sp, 16
-; CHECK-RV32-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: add sp, sp, a0
@@ -1351,498 +899,46 @@ define void @foo_with_call() #1 {
; CHECK-RV32-FV-NEXT: slli a0, a0, 5
; CHECK-RV32-FV-NEXT: sub sp, sp, a0
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: mv a1, a0
; CHECK-RV32-FV-NEXT: slli a0, a0, 1
; CHECK-RV32-FV-NEXT: add a0, a0, a1
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 4
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: addi a0, sp, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: call otherfoo
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: mv a1, a0
; CHECK-RV32-FV-NEXT: slli a0, a0, 1
; CHECK-RV32-FV-NEXT: add a0, a0, a1
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 4
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: addi a0, sp, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 5
; CHECK-RV32-FV-NEXT: add sp, sp, a0
@@ -1928,498 +1024,46 @@ define void @foo_with_call() #1 {
; CHECK-RV32-FDV-NEXT: slli a0, a0, 5
; CHECK-RV32-FDV-NEXT: sub sp, sp, a0
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: mv a1, a0
; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: add a0, a0, a1
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 4
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: addi a0, sp, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: call otherfoo
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: mv a1, a0
; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
; CHECK-RV32-FDV-NEXT: add a0, a0, a1
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 4
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: addi a0, sp, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 5
; CHECK-RV32-FDV-NEXT: add sp, sp, a0
@@ -3259,498 +1903,46 @@ define void @foo_with_call() #1 {
; CHECK-RV64-V-NEXT: slli a0, a0, 5
; CHECK-RV64-V-NEXT: sub sp, sp, a0
; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 5
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: mv a1, a0
; CHECK-RV64-V-NEXT: slli a0, a0, 1
; CHECK-RV64-V-NEXT: add a0, a0, a1
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 4
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: addi a0, sp, 16
-; CHECK-RV64-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: call otherfoo
; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 5
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: mv a1, a0
; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: add a0, a0, a1
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 4
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: addi a0, sp, 16
-; CHECK-RV64-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 5
; CHECK-RV64-V-NEXT: add sp, sp, a0
@@ -3816,498 +2008,46 @@ define void @foo_with_call() #1 {
; CHECK-RV64-FV-NEXT: slli a0, a0, 5
; CHECK-RV64-FV-NEXT: sub sp, sp, a0
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: mv a1, a0
; CHECK-RV64-FV-NEXT: slli a0, a0, 1
; CHECK-RV64-FV-NEXT: add a0, a0, a1
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 4
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: addi a0, sp, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: call otherfoo
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: mv a1, a0
; CHECK-RV64-FV-NEXT: slli a0, a0, 1
; CHECK-RV64-FV-NEXT: add a0, a0, a1
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 4
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: addi a0, sp, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 5
; CHECK-RV64-FV-NEXT: add sp, sp, a0
@@ -4393,498 +2133,46 @@ define void @foo_with_call() #1 {
; CHECK-RV64-FDV-NEXT: slli a0, a0, 5
; CHECK-RV64-FDV-NEXT: sub sp, sp, a0
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: mv a1, a0
; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: add a0, a0, a1
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 4
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: addi a0, sp, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: call otherfoo
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: mv a1, a0
; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
; CHECK-RV64-FDV-NEXT: add a0, a0, a1
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 4
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: addi a0, sp, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 5
; CHECK-RV64-FDV-NEXT: add sp, sp, a0
@@ -5670,422 +2958,39 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: sub sp, sp, a0
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 4
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 1
; CHECK-RV32-V-NEXT: add a0, a0, a1
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 5
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: call otherfoo
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 4
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: mv a1, a0
@@ -6093,81 +2998,12 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-V-NEXT: add a0, a0, a1
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 5
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: addi sp, s0, -80
; CHECK-RV32-V-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
; CHECK-RV32-V-NEXT: lw t0, 72(sp) # 4-byte Folded Reload
@@ -6234,172 +3070,15 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FV-NEXT: slli a0, a0, 5
; CHECK-RV32-FV-NEXT: sub sp, sp, a0
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 4
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: mv a1, a0
@@ -6407,331 +3086,36 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FV-NEXT: add a0, a0, a1
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 5
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: call otherfoo
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 4
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
; CHECK-RV32-FV-NEXT: slli a0, a0, 1
; CHECK-RV32-FV-NEXT: add a0, a0, a1
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 5
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: addi sp, s0, -160
; CHECK-RV32-FV-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; CHECK-RV32-FV-NEXT: lw t0, 152(sp) # 4-byte Folded Reload
@@ -6818,172 +3202,15 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FDV-NEXT: slli a0, a0, 5
; CHECK-RV32-FDV-NEXT: sub sp, sp, a0
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 4
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: mv a1, a0
@@ -6991,249 +3218,23 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FDV-NEXT: add a0, a0, a1
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 5
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: call otherfoo
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 4
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: mv a1, a0
@@ -7241,81 +3242,12 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FDV-NEXT: add a0, a0, a1
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 5
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: addi sp, s0, -240
; CHECK-RV32-FDV-NEXT: lw ra, 236(sp) # 4-byte Folded Reload
; CHECK-RV32-FDV-NEXT: lw t0, 232(sp) # 4-byte Folded Reload
@@ -8186,422 +4118,39 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-V-NEXT: slli a0, a0, 5
; CHECK-RV64-V-NEXT: sub sp, sp, a0
; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 4
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
; CHECK-RV64-V-NEXT: slli a0, a0, 1
; CHECK-RV64-V-NEXT: add a0, a0, a1
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 5
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 5
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: call otherfoo
; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 4
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: mv a1, a0
@@ -8609,81 +4158,12 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-V-NEXT: add a0, a0, a1
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 5
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 5
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: addi sp, s0, -160
; CHECK-RV64-V-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
; CHECK-RV64-V-NEXT: ld t0, 144(sp) # 8-byte Folded Reload
@@ -8750,172 +4230,15 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FV-NEXT: slli a0, a0, 5
; CHECK-RV64-FV-NEXT: sub sp, sp, a0
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 4
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: mv a1, a0
@@ -8923,331 +4246,36 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FV-NEXT: add a0, a0, a1
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 5
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: call otherfoo
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 4
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
; CHECK-RV64-FV-NEXT: slli a0, a0, 1
; CHECK-RV64-FV-NEXT: add a0, a0, a1
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 5
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: addi sp, s0, -240
; CHECK-RV64-FV-NEXT: ld ra, 232(sp) # 8-byte Folded Reload
; CHECK-RV64-FV-NEXT: ld t0, 224(sp) # 8-byte Folded Reload
@@ -9334,172 +4362,15 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FDV-NEXT: slli a0, a0, 5
; CHECK-RV64-FDV-NEXT: sub sp, sp, a0
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 4
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: mv a1, a0
@@ -9507,249 +4378,23 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FDV-NEXT: add a0, a0, a1
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 5
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: call otherfoo
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 4
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: mv a1, a0
@@ -9757,81 +4402,12 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FDV-NEXT: add a0, a0, a1
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 5
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: addi sp, s0, -320
; CHECK-RV64-FDV-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
; CHECK-RV64-FDV-NEXT: ld t0, 304(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
index b1a6d16..a06c750 100644
--- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
+++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
@@ -7,18 +7,18 @@
define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
; RV32-LABEL: ctz_nxv4i32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vid.v v10
-; RV32-NEXT: vmv.v.i v11, -1
; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; RV32-NEXT: vid.v v10
+; RV32-NEXT: li a1, -1
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vmsne.vi v0, v8, 0
; RV32-NEXT: srli a0, a0, 1
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vmacc.vv v8, v10, v11
-; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
+; RV32-NEXT: vmadd.vx v10, a1, v8
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: vmerge.vvm v8, v8, v10, v0
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: sub a0, a0, a1
@@ -28,21 +28,21 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
;
; RV64-LABEL: ctz_nxv4i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vid.v v10
-; RV64-NEXT: vmv.v.i v11, -1
; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; RV64-NEXT: vid.v v10
+; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vmacc.vv v8, v10, v11
-; RV64-NEXT: vmv.v.i v9, 0
-; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
+; RV64-NEXT: vmadd.vx v10, a1, v8
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vmerge.vvm v8, v8, v10, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
-; RV64-NEXT: subw a0, a0, a1
+; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: slli a0, a0, 48
; RV64-NEXT: srli a0, a0, 48
; RV64-NEXT: ret
@@ -109,17 +109,17 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
;
; RV64-LABEL: ctz_nxv8i1_no_range:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: vmv.v.i v24, -1
; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV64-NEXT: vid.v v16
+; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vmacc.vv v8, v16, v24
-; RV64-NEXT: vmv.v.i v16, 0
-; RV64-NEXT: vmerge.vvm v8, v16, v8, v0
+; RV64-NEXT: vmadd.vx v16, a1, v8
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: sub a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
index 20dd590..1216d30 100644
--- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
+++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll
@@ -35,7 +35,7 @@ define i16 @ctz_v4i32(<4 x i32> %a) {
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: li a1, 4
-; RV64-NEXT: subw a1, a1, a0
+; RV64-NEXT: sub a1, a1, a0
; RV64-NEXT: zext.b a0, a1
; RV64-NEXT: ret
%res = call i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32> %a, i1 0)
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index 1be599e4..7a1c41c 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -454,7 +454,7 @@ define i32 @test_reassoc_add_sub_i32_1(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_add_sub_i32_1:
; CHECK: # %bb.0:
; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: subw a2, a2, a3
+; CHECK-NEXT: sub a2, a2, a3
; CHECK-NEXT: subw a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i32 %a0, %a1
@@ -467,7 +467,7 @@ define i32 @test_reassoc_add_sub_i32_2(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_add_sub_i32_2:
; CHECK: # %bb.0:
; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: subw a2, a2, a3
+; CHECK-NEXT: sub a2, a2, a3
; CHECK-NEXT: addw a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i32 %a0, %a1
diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
index 0d57e42..cd93579 100644
--- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll
@@ -3780,9 +3780,9 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_5:
; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a3, a1
@@ -3985,9 +3985,9 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize {
;
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_6:
; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a3, a1
diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll
index 0caab1f..a5bdb13 100644
--- a/llvm/test/CodeGen/RISCV/memcmp.ll
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -4410,9 +4410,9 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind {
;
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_5:
; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a3, a1
@@ -4615,9 +4615,9 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind {
;
; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_6:
; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a0)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0)
-; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a3, 0(a1)
+; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1)
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a0, a2, a0
; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a3, a1
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 27d5eaa..4c9a98c 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -1080,14 +1080,14 @@ define i32 @muli32_m65(i32 %a) nounwind {
; RV64I-LABEL: muli32_m65:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a1, a0, 6
-; RV64I-NEXT: negw a0, a0
+; RV64I-NEXT: neg a0, a0
; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muli32_m65:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a1, a0, 6
-; RV64IM-NEXT: negw a0, a0
+; RV64IM-NEXT: neg a0, a0
; RV64IM-NEXT: subw a0, a0, a1
; RV64IM-NEXT: ret
%1 = mul i32 %a, -65
@@ -1980,14 +1980,14 @@ define i8 @muladd_demand(i8 %x, i8 %y) nounwind {
; RV64I-LABEL: muladd_demand:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 1
-; RV64I-NEXT: subw a0, a1, a0
+; RV64I-NEXT: sub a0, a1, a0
; RV64I-NEXT: andi a0, a0, 15
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muladd_demand:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a0, a0, 1
-; RV64IM-NEXT: subw a0, a1, a0
+; RV64IM-NEXT: sub a0, a1, a0
; RV64IM-NEXT: andi a0, a0, 15
; RV64IM-NEXT: ret
%m = mul i8 %x, 14
@@ -2048,14 +2048,14 @@ define i8 @muladd_demand_2(i8 %x, i8 %y) nounwind {
; RV64I-LABEL: muladd_demand_2:
; RV64I: # %bb.0:
; RV64I-NEXT: slli a0, a0, 1
-; RV64I-NEXT: subw a1, a1, a0
+; RV64I-NEXT: sub a1, a1, a0
; RV64I-NEXT: ori a0, a1, -16
; RV64I-NEXT: ret
;
; RV64IM-LABEL: muladd_demand_2:
; RV64IM: # %bb.0:
; RV64IM-NEXT: slli a0, a0, 1
-; RV64IM-NEXT: subw a1, a1, a0
+; RV64IM-NEXT: sub a1, a1, a0
; RV64IM-NEXT: ori a0, a1, -16
; RV64IM-NEXT: ret
%m = mul i8 %x, 14
diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll
index fe19a4fa..da81fe5 100644
--- a/llvm/test/CodeGen/RISCV/neg-abs.ll
+++ b/llvm/test/CodeGen/RISCV/neg-abs.ll
@@ -179,7 +179,7 @@ define i32 @neg_abs32_multiuse(i32 %x, ptr %y) {
; RV64I: # %bb.0:
; RV64I-NEXT: sraiw a2, a0, 31
; RV64I-NEXT: xor a0, a0, a2
-; RV64I-NEXT: subw a2, a0, a2
+; RV64I-NEXT: sub a2, a0, a2
; RV64I-NEXT: negw a0, a2
; RV64I-NEXT: sw a2, 0(a1)
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index 47b90a0..ba6769b 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -833,7 +833,7 @@ define i1 @usubo_ugt_i32(i32 %x, i32 %y, ptr %p) {
; RV64-NEXT: sext.w a3, a1
; RV64-NEXT: sext.w a4, a0
; RV64-NEXT: sltu a3, a4, a3
-; RV64-NEXT: subw a0, a0, a1
+; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: sw a0, 0(a2)
; RV64-NEXT: mv a0, a3
; RV64-NEXT: ret
@@ -860,7 +860,7 @@ define i1 @usubo_ugt_constant_op0_i8(i8 %x, ptr %p) {
; RV64: # %bb.0:
; RV64-NEXT: zext.b a2, a0
; RV64-NEXT: li a3, 42
-; RV64-NEXT: subw a3, a3, a0
+; RV64-NEXT: sub a3, a3, a0
; RV64-NEXT: sltiu a0, a2, 43
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: sb a3, 0(a1)
@@ -890,7 +890,7 @@ define i1 @usubo_ult_constant_op0_i16(i16 %x, ptr %p) {
; RV64-NEXT: slli a2, a0, 48
; RV64-NEXT: li a3, 43
; RV64-NEXT: srli a2, a2, 48
-; RV64-NEXT: subw a3, a3, a0
+; RV64-NEXT: sub a3, a3, a0
; RV64-NEXT: sltiu a0, a2, 44
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: sh a3, 0(a1)
@@ -987,7 +987,7 @@ define i1 @usubo_ne_constant0_op1_i32(i32 %x, ptr %p) {
; RV64-LABEL: usubo_ne_constant0_op1_i32:
; RV64: # %bb.0:
; RV64-NEXT: sext.w a2, a0
-; RV64-NEXT: negw a3, a0
+; RV64-NEXT: neg a3, a0
; RV64-NEXT: snez a0, a2
; RV64-NEXT: sw a3, 0(a1)
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/pr145360.ll b/llvm/test/CodeGen/RISCV/pr145360.ll
index 4251ac6..1c77fad 100644
--- a/llvm/test/CodeGen/RISCV/pr145360.ll
+++ b/llvm/test/CodeGen/RISCV/pr145360.ll
@@ -8,7 +8,7 @@ define i32 @signed(i32 %0, ptr %1) {
; CHECK-NEXT: srliw a2, a2, 24
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: andi a2, a2, -256
-; CHECK-NEXT: subw a2, a0, a2
+; CHECK-NEXT: sub a2, a0, a2
; CHECK-NEXT: sraiw a0, a0, 8
; CHECK-NEXT: sw a2, 0(a1)
; CHECK-NEXT: ret
@@ -29,7 +29,7 @@ define i32 @unsigned(i32 %0, ptr %1) {
; CHECK-NEXT: srli a2, a2, 36
; CHECK-NEXT: slli a4, a2, 5
; CHECK-NEXT: slli a2, a2, 3
-; CHECK-NEXT: subw a2, a2, a4
+; CHECK-NEXT: sub a2, a2, a4
; CHECK-NEXT: srliw a4, a0, 3
; CHECK-NEXT: add a2, a0, a2
; CHECK-NEXT: mulw a0, a4, a3
@@ -49,7 +49,7 @@ define i32 @signed_div_first(i32 %0, ptr %1) {
; CHECK-NEXT: add a3, a0, a2
; CHECK-NEXT: sraiw a2, a3, 8
; CHECK-NEXT: andi a3, a3, -256
-; CHECK-NEXT: subw a0, a0, a3
+; CHECK-NEXT: sub a0, a0, a3
; CHECK-NEXT: sw a0, 0(a1)
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: ret
@@ -70,7 +70,7 @@ define i32 @unsigned_div_first(i32 %0, ptr %1) {
; CHECK-NEXT: srli a2, a2, 36
; CHECK-NEXT: slli a3, a2, 5
; CHECK-NEXT: slli a4, a2, 3
-; CHECK-NEXT: subw a4, a4, a3
+; CHECK-NEXT: sub a4, a4, a3
; CHECK-NEXT: add a0, a0, a4
; CHECK-NEXT: sw a0, 0(a1)
; CHECK-NEXT: mv a0, a2
diff --git a/llvm/test/CodeGen/RISCV/prefer-w-inst.mir b/llvm/test/CodeGen/RISCV/prefer-w-inst.mir
index e05e27a..b8ff783 100644
--- a/llvm/test/CodeGen/RISCV/prefer-w-inst.mir
+++ b/llvm/test/CodeGen/RISCV/prefer-w-inst.mir
@@ -239,8 +239,8 @@ body: |
; NO-PREFER-W-INST-NEXT: {{ $}}
; NO-PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; NO-PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; NO-PREFER-W-INST-NEXT: [[LWU:%[0-9]+]]:gpr = LWU [[COPY]], 0
- ; NO-PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[LWU]], 1
+ ; NO-PREFER-W-INST-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0
+ ; NO-PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[LW]], 1
; NO-PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]]
; NO-PREFER-W-INST-NEXT: PseudoRET
;
diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
index 634cca5..cf64650 100644
--- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll
+++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll
@@ -29,7 +29,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotl_32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sllw a1, a0, a1
; RV64I-NEXT: srlw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -56,7 +56,7 @@ define i32 @rotl_32(i32 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_32:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sllw a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srlw a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -78,7 +78,7 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotr_32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srlw a1, a0, a1
; RV64I-NEXT: sllw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -105,7 +105,7 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_32:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srlw a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sllw a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -159,7 +159,7 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
;
; RV64I-LABEL: rotl_64:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sll a1, a0, a1
; RV64I-NEXT: srl a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -253,7 +253,7 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_64:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sll a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srl a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -307,7 +307,7 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
;
; RV64I-LABEL: rotr_64:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srl a1, a0, a1
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -401,7 +401,7 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_64:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srl a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sll a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -423,7 +423,7 @@ define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotl_32_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sllw a1, a0, a1
; RV64I-NEXT: srlw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -450,7 +450,7 @@ define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_32_mask:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sllw a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srlw a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -474,7 +474,7 @@ define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64I-LABEL: rotl_32_mask_and_63_and_31:
; RV64I: # %bb.0:
; RV64I-NEXT: sllw a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -500,7 +500,7 @@ define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_32_mask_and_63_and_31:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sllw a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srlw a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -545,7 +545,7 @@ define i32 @rotl_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_32_mask_or_64_or_32:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sllw a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srlw a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -569,7 +569,7 @@ define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotr_32_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srlw a1, a0, a1
; RV64I-NEXT: sllw a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -596,7 +596,7 @@ define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_32_mask:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srlw a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sllw a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -620,7 +620,7 @@ define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64I-LABEL: rotr_32_mask_and_63_and_31:
; RV64I: # %bb.0:
; RV64I-NEXT: srlw a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -646,7 +646,7 @@ define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_32_mask_and_63_and_31:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srlw a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sllw a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -691,7 +691,7 @@ define i32 @rotr_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_32_mask_or_64_or_32:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srlw a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sllw a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -745,7 +745,7 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
;
; RV64I-LABEL: rotl_64_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sll a1, a0, a1
; RV64I-NEXT: srl a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -835,7 +835,7 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_64_mask:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sll a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srl a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -890,7 +890,7 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64I-LABEL: rotl_64_mask_and_127_and_63:
; RV64I: # %bb.0:
; RV64I-NEXT: sll a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: srl a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -981,7 +981,7 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_64_mask_and_127_and_63:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sll a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srl a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -1026,7 +1026,7 @@ define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_64_mask_or_128_or_64:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sll a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srl a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -1080,7 +1080,7 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
;
; RV64I-LABEL: rotr_64_mask:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srl a1, a0, a1
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -1170,7 +1170,7 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_64_mask:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srl a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sll a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -1225,7 +1225,7 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64I-LABEL: rotr_64_mask_and_127_and_63:
; RV64I: # %bb.0:
; RV64I-NEXT: srl a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -1316,7 +1316,7 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_64_mask_and_127_and_63:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srl a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sll a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -1361,7 +1361,7 @@ define i64 @rotr_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_64_mask_or_128_or_64:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srl a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sll a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -1390,7 +1390,7 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
; RV64I-LABEL: rotl_32_mask_shared:
; RV64I: # %bb.0:
; RV64I-NEXT: sllw a3, a0, a2
-; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: neg a4, a2
; RV64I-NEXT: srlw a0, a0, a4
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: sllw a1, a1, a2
@@ -1424,7 +1424,7 @@ define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
; RV64XTHEADBB-LABEL: rotl_32_mask_shared:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sllw a3, a0, a2
-; RV64XTHEADBB-NEXT: negw a4, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
; RV64XTHEADBB-NEXT: srlw a0, a0, a4
; RV64XTHEADBB-NEXT: or a0, a3, a0
; RV64XTHEADBB-NEXT: sllw a1, a1, a2
@@ -1486,7 +1486,7 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV64I-LABEL: rotl_64_mask_shared:
; RV64I: # %bb.0:
; RV64I-NEXT: sll a3, a0, a2
-; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: neg a4, a2
; RV64I-NEXT: srl a0, a0, a4
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: sll a1, a1, a2
@@ -1590,7 +1590,7 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV64XTHEADBB-LABEL: rotl_64_mask_shared:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sll a3, a0, a2
-; RV64XTHEADBB-NEXT: negw a4, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
; RV64XTHEADBB-NEXT: srl a0, a0, a4
; RV64XTHEADBB-NEXT: or a0, a3, a0
; RV64XTHEADBB-NEXT: sll a1, a1, a2
@@ -1618,7 +1618,7 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
; RV64I-LABEL: rotr_32_mask_shared:
; RV64I: # %bb.0:
; RV64I-NEXT: srlw a3, a0, a2
-; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: neg a4, a2
; RV64I-NEXT: sllw a0, a0, a4
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: sllw a1, a1, a2
@@ -1652,7 +1652,7 @@ define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 sign
; RV64XTHEADBB-LABEL: rotr_32_mask_shared:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srlw a3, a0, a2
-; RV64XTHEADBB-NEXT: negw a4, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
; RV64XTHEADBB-NEXT: sllw a0, a0, a4
; RV64XTHEADBB-NEXT: or a0, a3, a0
; RV64XTHEADBB-NEXT: sllw a1, a1, a2
@@ -1713,7 +1713,7 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV64I-LABEL: rotr_64_mask_shared:
; RV64I: # %bb.0:
; RV64I-NEXT: srl a3, a0, a2
-; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: neg a4, a2
; RV64I-NEXT: sll a0, a0, a4
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: sll a1, a1, a2
@@ -1816,7 +1816,7 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign
; RV64XTHEADBB-LABEL: rotr_64_mask_shared:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srl a3, a0, a2
-; RV64XTHEADBB-NEXT: negw a4, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
; RV64XTHEADBB-NEXT: sll a0, a0, a4
; RV64XTHEADBB-NEXT: or a0, a3, a0
; RV64XTHEADBB-NEXT: sll a1, a1, a2
@@ -1846,7 +1846,7 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
; RV64I-LABEL: rotl_32_mask_multiple:
; RV64I: # %bb.0:
; RV64I-NEXT: sllw a3, a0, a2
-; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: neg a4, a2
; RV64I-NEXT: sllw a2, a1, a2
; RV64I-NEXT: srlw a0, a0, a4
; RV64I-NEXT: srlw a1, a1, a4
@@ -1884,7 +1884,7 @@ define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
; RV64XTHEADBB-LABEL: rotl_32_mask_multiple:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sllw a3, a0, a2
-; RV64XTHEADBB-NEXT: negw a4, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
; RV64XTHEADBB-NEXT: sllw a2, a1, a2
; RV64XTHEADBB-NEXT: srlw a0, a0, a4
; RV64XTHEADBB-NEXT: srlw a1, a1, a4
@@ -1948,7 +1948,7 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV64I-LABEL: rotl_64_mask_multiple:
; RV64I: # %bb.0:
; RV64I-NEXT: sll a3, a0, a2
-; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: neg a4, a2
; RV64I-NEXT: sll a2, a1, a2
; RV64I-NEXT: srl a0, a0, a4
; RV64I-NEXT: srl a1, a1, a4
@@ -2056,7 +2056,7 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV64XTHEADBB-LABEL: rotl_64_mask_multiple:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sll a3, a0, a2
-; RV64XTHEADBB-NEXT: negw a4, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
; RV64XTHEADBB-NEXT: sll a2, a1, a2
; RV64XTHEADBB-NEXT: srl a0, a0, a4
; RV64XTHEADBB-NEXT: srl a1, a1, a4
@@ -2087,7 +2087,7 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
; RV64I-LABEL: rotr_32_mask_multiple:
; RV64I: # %bb.0:
; RV64I-NEXT: srlw a3, a0, a2
-; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: neg a4, a2
; RV64I-NEXT: srlw a2, a1, a2
; RV64I-NEXT: sllw a0, a0, a4
; RV64I-NEXT: sllw a1, a1, a4
@@ -2125,7 +2125,7 @@ define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 si
; RV64XTHEADBB-LABEL: rotr_32_mask_multiple:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srlw a3, a0, a2
-; RV64XTHEADBB-NEXT: negw a4, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
; RV64XTHEADBB-NEXT: srlw a2, a1, a2
; RV64XTHEADBB-NEXT: sllw a0, a0, a4
; RV64XTHEADBB-NEXT: sllw a1, a1, a4
@@ -2188,7 +2188,7 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV64I-LABEL: rotr_64_mask_multiple:
; RV64I: # %bb.0:
; RV64I-NEXT: srl a3, a0, a2
-; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: neg a4, a2
; RV64I-NEXT: srl a2, a1, a2
; RV64I-NEXT: sll a0, a0, a4
; RV64I-NEXT: sll a1, a1, a4
@@ -2295,7 +2295,7 @@ define i64 @rotr_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind {
; RV64XTHEADBB-LABEL: rotr_64_mask_multiple:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srl a3, a0, a2
-; RV64XTHEADBB-NEXT: negw a4, a2
+; RV64XTHEADBB-NEXT: neg a4, a2
; RV64XTHEADBB-NEXT: srl a2, a1, a2
; RV64XTHEADBB-NEXT: sll a0, a0, a4
; RV64XTHEADBB-NEXT: sll a1, a1, a4
@@ -2353,7 +2353,7 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotl_64_zext:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: sll a1, a0, a1
; RV64I-NEXT: srl a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -2447,7 +2447,7 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotl_64_zext:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: sll a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: srl a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
@@ -2503,7 +2503,7 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind {
;
; RV64I-LABEL: rotr_64_zext:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: srl a1, a0, a1
; RV64I-NEXT: sll a0, a0, a2
; RV64I-NEXT: or a0, a1, a0
@@ -2597,7 +2597,7 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind {
; RV64XTHEADBB-LABEL: rotr_64_zext:
; RV64XTHEADBB: # %bb.0:
; RV64XTHEADBB-NEXT: srl a2, a0, a1
-; RV64XTHEADBB-NEXT: negw a1, a1
+; RV64XTHEADBB-NEXT: neg a1, a1
; RV64XTHEADBB-NEXT: sll a0, a0, a1
; RV64XTHEADBB-NEXT: or a0, a2, a0
; RV64XTHEADBB-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll b/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll
index b8c4328..721436d 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll
@@ -121,7 +121,7 @@ define signext i32 @andi_sub_cse(i32 signext %0, i32 signext %1, ptr %2) {
define signext i32 @addi_sub_cse(i32 signext %0, i32 signext %1, ptr %2) {
; CHECK-LABEL: addi_sub_cse:
; CHECK: # %bb.0:
-; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: addiw a0, a0, -8
; CHECK-NEXT: sw a0, 0(a2)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
index dad20b2..6b4c253 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll
@@ -501,14 +501,14 @@ define signext i32 @sext_subw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind
define zeroext i32 @zext_subw_aext_aext(i32 %a, i32 %b) nounwind {
; RV64I-LABEL: zext_subw_aext_aext:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_subw_aext_aext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: subw a0, a0, a1
+; RV64ZBA-NEXT: sub a0, a0, a1
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
@@ -518,14 +518,14 @@ define zeroext i32 @zext_subw_aext_aext(i32 %a, i32 %b) nounwind {
define zeroext i32 @zext_subw_aext_sext(i32 %a, i32 signext %b) nounwind {
; RV64I-LABEL: zext_subw_aext_sext:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_subw_aext_sext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: subw a0, a0, a1
+; RV64ZBA-NEXT: sub a0, a0, a1
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
@@ -535,14 +535,14 @@ define zeroext i32 @zext_subw_aext_sext(i32 %a, i32 signext %b) nounwind {
define zeroext i32 @zext_subw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
; RV64I-LABEL: zext_subw_aext_zext:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_subw_aext_zext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: subw a0, a0, a1
+; RV64ZBA-NEXT: sub a0, a0, a1
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
@@ -552,14 +552,14 @@ define zeroext i32 @zext_subw_aext_zext(i32 %a, i32 zeroext %b) nounwind {
define zeroext i32 @zext_subw_sext_aext(i32 signext %a, i32 %b) nounwind {
; RV64I-LABEL: zext_subw_sext_aext:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_subw_sext_aext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: subw a0, a0, a1
+; RV64ZBA-NEXT: sub a0, a0, a1
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
@@ -569,14 +569,14 @@ define zeroext i32 @zext_subw_sext_aext(i32 signext %a, i32 %b) nounwind {
define zeroext i32 @zext_subw_sext_sext(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: zext_subw_sext_sext:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_subw_sext_sext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: subw a0, a0, a1
+; RV64ZBA-NEXT: sub a0, a0, a1
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
@@ -586,14 +586,14 @@ define zeroext i32 @zext_subw_sext_sext(i32 signext %a, i32 signext %b) nounwind
define zeroext i32 @zext_subw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind {
; RV64I-LABEL: zext_subw_sext_zext:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_subw_sext_zext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: subw a0, a0, a1
+; RV64ZBA-NEXT: sub a0, a0, a1
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
@@ -603,14 +603,14 @@ define zeroext i32 @zext_subw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind
define zeroext i32 @zext_subw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
; RV64I-LABEL: zext_subw_zext_aext:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_subw_zext_aext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: subw a0, a0, a1
+; RV64ZBA-NEXT: sub a0, a0, a1
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
@@ -620,14 +620,14 @@ define zeroext i32 @zext_subw_zext_aext(i32 zeroext %a, i32 %b) nounwind {
define zeroext i32 @zext_subw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind {
; RV64I-LABEL: zext_subw_zext_sext:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_subw_zext_sext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: subw a0, a0, a1
+; RV64ZBA-NEXT: sub a0, a0, a1
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
@@ -637,14 +637,14 @@ define zeroext i32 @zext_subw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind
define zeroext i32 @zext_subw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind {
; RV64I-LABEL: zext_subw_zext_zext:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
;
; RV64ZBA-LABEL: zext_subw_zext_zext:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: subw a0, a0, a1
+; RV64ZBA-NEXT: sub a0, a0, a1
; RV64ZBA-NEXT: zext.w a0, a0
; RV64ZBA-NEXT: ret
%1 = sub i32 %a, %b
diff --git a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
index 0782018..219a5aa 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-w-insts-legalization.ll
@@ -9,7 +9,7 @@ define signext i32 @addw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
; CHECK-NEXT: not a2, a0
; CHECK-NEXT: addi a3, a0, 1
; CHECK-NEXT: add a2, a2, a1
-; CHECK-NEXT: subw a1, a1, a0
+; CHECK-NEXT: sub a1, a1, a0
; CHECK-NEXT: addi a1, a1, -2
; CHECK-NEXT: mul a3, a2, a3
; CHECK-NEXT: slli a1, a1, 32
@@ -53,7 +53,7 @@ define signext i32 @subw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
; CHECK-NEXT: bge a0, a1, .LBB1_2
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: not a2, a0
-; CHECK-NEXT: subw a3, a1, a0
+; CHECK-NEXT: sub a3, a1, a0
; CHECK-NEXT: add a1, a2, a1
; CHECK-NEXT: addi a3, a3, -2
; CHECK-NEXT: mul a2, a1, a2
@@ -61,7 +61,7 @@ define signext i32 @subw(i32 signext %s, i32 signext %n, i32 signext %k) nounwin
; CHECK-NEXT: slli a1, a1, 32
; CHECK-NEXT: mulhu a1, a1, a3
; CHECK-NEXT: srli a1, a1, 1
-; CHECK-NEXT: subw a0, a2, a0
+; CHECK-NEXT: sub a0, a2, a0
; CHECK-NEXT: subw a0, a0, a1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_2:
diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
index 00f7b46..81acb4f7 100644
--- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll
@@ -357,7 +357,7 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB6_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli a1, a0, 6
; RV64I-NEXT: slli a2, a0, 8
@@ -365,16 +365,16 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: slli a4, a0, 12
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a0, 16
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 18
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a4, a0, 4
-; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: sub a4, a0, a4
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: slli a4, a0, 14
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 23
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a0, a0, 27
; RV64I-NEXT: add a1, a1, a3
; RV64I-NEXT: add a0, a2, a0
@@ -410,7 +410,7 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
; RV64I-LABEL: cttz_zero_undef_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli a1, a0, 6
; RV64I-NEXT: slli a2, a0, 8
@@ -418,16 +418,16 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
; RV64I-NEXT: slli a4, a0, 12
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a0, 16
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 18
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a4, a0, 4
-; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: sub a4, a0, a4
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: slli a4, a0, 14
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 23
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a0, a0, 27
; RV64I-NEXT: add a1, a1, a3
; RV64I-NEXT: add a0, a2, a0
@@ -455,7 +455,7 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-LABEL: findFirstSet_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a1, 6
; RV64I-NEXT: slli a3, a1, 8
@@ -463,16 +463,16 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: slli a5, a1, 12
; RV64I-NEXT: add a2, a2, a3
; RV64I-NEXT: slli a3, a1, 16
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 18
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a5, a1, 4
-; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: sub a5, a1, a5
; RV64I-NEXT: add a2, a5, a2
; RV64I-NEXT: slli a5, a1, 14
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 23
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a1, a1, 27
; RV64I-NEXT: add a2, a2, a4
; RV64I-NEXT: add a1, a3, a1
@@ -508,7 +508,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-LABEL: ffs_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a1, 6
; RV64I-NEXT: slli a3, a1, 8
@@ -516,16 +516,16 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-NEXT: slli a5, a1, 12
; RV64I-NEXT: add a2, a2, a3
; RV64I-NEXT: slli a3, a1, 16
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 18
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a5, a1, 4
-; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: sub a5, a1, a5
; RV64I-NEXT: add a2, a5, a2
; RV64I-NEXT: slli a5, a1, 14
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 23
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: add a2, a2, a4
; RV64I-NEXT: lui a4, %hi(.LCPI9_0)
; RV64I-NEXT: addi a4, a4, %lo(.LCPI9_0)
diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll
index fdff4a3..b46f7cc 100644
--- a/llvm/test/CodeGen/RISCV/rv64zba.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zba.ll
@@ -3707,7 +3707,7 @@ define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) {
define i64 @regression(i32 signext %x, i32 signext %y) {
; RV64I-LABEL: regression:
; RV64I: # %bb.0:
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: slli a0, a0, 32
; RV64I-NEXT: srli a1, a0, 29
; RV64I-NEXT: srli a0, a0, 27
@@ -3716,14 +3716,14 @@ define i64 @regression(i32 signext %x, i32 signext %y) {
;
; RV64ZBA-LABEL: regression:
; RV64ZBA: # %bb.0:
-; RV64ZBA-NEXT: subw a0, a0, a1
+; RV64ZBA-NEXT: sub a0, a0, a1
; RV64ZBA-NEXT: slli.uw a0, a0, 3
; RV64ZBA-NEXT: sh1add a0, a0, a0
; RV64ZBA-NEXT: ret
;
; RV64XANDESPERF-LABEL: regression:
; RV64XANDESPERF: # %bb.0:
-; RV64XANDESPERF-NEXT: subw a0, a0, a1
+; RV64XANDESPERF-NEXT: sub a0, a0, a1
; RV64XANDESPERF-NEXT: slli a0, a0, 32
; RV64XANDESPERF-NEXT: srli a0, a0, 29
; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a0
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
index 12fc98c..f2c95f8 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbkb.ll
@@ -225,7 +225,7 @@ define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: rol_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: sllw a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -243,7 +243,7 @@ define void @rol_i32_nosext(i32 signext %a, i32 signext %b, ptr %x) nounwind {
; RV64I-LABEL: rol_i32_nosext:
; RV64I: # %bb.0:
; RV64I-NEXT: sllw a3, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: srlw a0, a0, a1
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: sw a0, 0(a2)
@@ -263,7 +263,7 @@ define signext i32 @rol_i32_neg_constant_rhs(i32 signext %a) nounwind {
; RV64I-LABEL: rol_i32_neg_constant_rhs:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, -2
-; RV64I-NEXT: negw a2, a0
+; RV64I-NEXT: neg a2, a0
; RV64I-NEXT: sllw a0, a1, a0
; RV64I-NEXT: srlw a1, a1, a2
; RV64I-NEXT: or a0, a0, a1
@@ -284,7 +284,7 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: rol_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: sll a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: srl a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -303,7 +303,7 @@ define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
; RV64I-LABEL: ror_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: srlw a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
@@ -321,7 +321,7 @@ define void @ror_i32_nosext(i32 signext %a, i32 signext %b, ptr %x) nounwind {
; RV64I-LABEL: ror_i32_nosext:
; RV64I: # %bb.0:
; RV64I-NEXT: srlw a3, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: sllw a0, a0, a1
; RV64I-NEXT: or a0, a3, a0
; RV64I-NEXT: sw a0, 0(a2)
@@ -341,7 +341,7 @@ define signext i32 @ror_i32_neg_constant_rhs(i32 signext %a) nounwind {
; RV64I-LABEL: ror_i32_neg_constant_rhs:
; RV64I: # %bb.0:
; RV64I-NEXT: li a1, -2
-; RV64I-NEXT: negw a2, a0
+; RV64I-NEXT: neg a2, a0
; RV64I-NEXT: srlw a0, a1, a0
; RV64I-NEXT: sllw a1, a1, a2
; RV64I-NEXT: or a0, a0, a1
@@ -362,7 +362,7 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
; RV64I-LABEL: ror_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: srl a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index e640727..d133f9d 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -347,7 +347,7 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I: # %bb.0:
; RV64I-NEXT: beqz a0, .LBB6_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli a1, a0, 6
; RV64I-NEXT: slli a2, a0, 8
@@ -355,16 +355,16 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
; RV64I-NEXT: slli a4, a0, 12
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a0, 16
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 18
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a4, a0, 4
-; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: sub a4, a0, a4
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: slli a4, a0, 14
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 23
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a0, a0, 27
; RV64I-NEXT: add a1, a1, a3
; RV64I-NEXT: add a0, a2, a0
@@ -390,7 +390,7 @@ define signext i32 @cttz_i32(i32 signext %a) nounwind {
define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
; RV64I-LABEL: cttz_zero_undef_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: slli a1, a0, 6
; RV64I-NEXT: slli a2, a0, 8
@@ -398,16 +398,16 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
; RV64I-NEXT: slli a4, a0, 12
; RV64I-NEXT: add a1, a1, a2
; RV64I-NEXT: slli a2, a0, 16
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 18
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a4, a0, 4
-; RV64I-NEXT: subw a4, a0, a4
+; RV64I-NEXT: sub a4, a0, a4
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: slli a4, a0, 14
-; RV64I-NEXT: subw a3, a3, a4
+; RV64I-NEXT: sub a3, a3, a4
; RV64I-NEXT: slli a4, a0, 23
-; RV64I-NEXT: subw a2, a2, a4
+; RV64I-NEXT: sub a2, a2, a4
; RV64I-NEXT: slli a0, a0, 27
; RV64I-NEXT: add a1, a1, a3
; RV64I-NEXT: add a0, a2, a0
@@ -430,7 +430,7 @@ define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind {
define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-LABEL: findFirstSet_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a1, 6
; RV64I-NEXT: slli a3, a1, 8
@@ -438,16 +438,16 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
; RV64I-NEXT: slli a5, a1, 12
; RV64I-NEXT: add a2, a2, a3
; RV64I-NEXT: slli a3, a1, 16
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 18
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a5, a1, 4
-; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: sub a5, a1, a5
; RV64I-NEXT: add a2, a5, a2
; RV64I-NEXT: slli a5, a1, 14
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 23
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a1, a1, 27
; RV64I-NEXT: add a2, a2, a4
; RV64I-NEXT: add a1, a3, a1
@@ -478,7 +478,7 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-LABEL: ffs_i32:
; RV64I: # %bb.0:
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a1, a0, a1
; RV64I-NEXT: slli a2, a1, 6
; RV64I-NEXT: slli a3, a1, 8
@@ -486,16 +486,16 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
; RV64I-NEXT: slli a5, a1, 12
; RV64I-NEXT: add a2, a2, a3
; RV64I-NEXT: slli a3, a1, 16
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 18
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: slli a5, a1, 4
-; RV64I-NEXT: subw a5, a1, a5
+; RV64I-NEXT: sub a5, a1, a5
; RV64I-NEXT: add a2, a5, a2
; RV64I-NEXT: slli a5, a1, 14
-; RV64I-NEXT: subw a4, a4, a5
+; RV64I-NEXT: sub a4, a4, a5
; RV64I-NEXT: slli a5, a1, 23
-; RV64I-NEXT: subw a3, a3, a5
+; RV64I-NEXT: sub a3, a3, a5
; RV64I-NEXT: add a2, a2, a4
; RV64I-NEXT: lui a4, %hi(.LCPI9_0)
; RV64I-NEXT: addi a4, a4, %lo(.LCPI9_0)
@@ -701,7 +701,7 @@ define signext i32 @ctpop_i32_load(ptr %p) nounwind {
;
; RV64ZBB-LABEL: ctpop_i32_load:
; RV64ZBB: # %bb.0:
-; RV64ZBB-NEXT: lwu a0, 0(a0)
+; RV64ZBB-NEXT: lw a0, 0(a0)
; RV64ZBB-NEXT: cpopw a0, a0
; RV64ZBB-NEXT: ret
%a = load i32, ptr %p
@@ -1741,7 +1741,7 @@ define i8 @sub_if_uge_i8(i8 %x, i8 %y) {
; RV64ZBB-LABEL: sub_if_uge_i8:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: zext.b a2, a0
-; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: sub a0, a0, a1
; RV64ZBB-NEXT: zext.b a0, a0
; RV64ZBB-NEXT: minu a0, a2, a0
; RV64ZBB-NEXT: ret
@@ -1767,7 +1767,7 @@ define i16 @sub_if_uge_i16(i16 %x, i16 %y) {
; RV64ZBB-LABEL: sub_if_uge_i16:
; RV64ZBB: # %bb.0:
; RV64ZBB-NEXT: zext.h a2, a0
-; RV64ZBB-NEXT: subw a0, a0, a1
+; RV64ZBB-NEXT: sub a0, a0, a1
; RV64ZBB-NEXT: zext.h a0, a0
; RV64ZBB-NEXT: minu a0, a2, a0
; RV64ZBB-NEXT: ret
@@ -1852,7 +1852,7 @@ define i32 @sub_if_uge_multiuse_select_i32(i32 %x, i32 %y) {
; CHECK-NEXT: sltu a2, a3, a2
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: and a1, a2, a1
-; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: sllw a0, a0, a1
; CHECK-NEXT: ret
%cmp = icmp ult i32 %x, %y
@@ -1870,7 +1870,7 @@ define i32 @sub_if_uge_multiuse_cmp_i32(i32 %x, i32 %y) {
; RV64I-NEXT: sltu a4, a3, a2
; RV64I-NEXT: addi a4, a4, -1
; RV64I-NEXT: and a1, a4, a1
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: bltu a3, a2, .LBB68_2
; RV64I-NEXT: # %bb.1:
; RV64I-NEXT: li a1, 4
@@ -1980,7 +1980,7 @@ define i32 @sub_if_uge_C_i32(i32 signext %x) {
; RV64I-NEXT: lui a2, 1048560
; RV64I-NEXT: addi a1, a1, -16
; RV64I-NEXT: sltu a1, a1, a0
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: addi a2, a2, 15
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: addw a0, a0, a1
@@ -2036,7 +2036,7 @@ define i32 @sub_if_uge_C_multiuse_cmp_i32(i32 signext %x, ptr %z) {
; RV64I-NEXT: lui a3, 1048560
; RV64I-NEXT: addi a2, a2, -16
; RV64I-NEXT: sltu a2, a2, a0
-; RV64I-NEXT: negw a4, a2
+; RV64I-NEXT: neg a4, a2
; RV64I-NEXT: addi a3, a3, 15
; RV64I-NEXT: and a3, a4, a3
; RV64I-NEXT: addw a0, a0, a3
diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
index 696c2a5..818ea72 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll
@@ -114,7 +114,7 @@ define i64 @pack_i64_3(ptr %0, ptr %1) {
; RV64ZBKB-LABEL: pack_i64_3:
; RV64ZBKB: # %bb.0:
; RV64ZBKB-NEXT: lw a0, 0(a0)
-; RV64ZBKB-NEXT: lwu a1, 0(a1)
+; RV64ZBKB-NEXT: lw a1, 0(a1)
; RV64ZBKB-NEXT: pack a0, a1, a0
; RV64ZBKB-NEXT: ret
%3 = load i32, ptr %0, align 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
index 96c349d..d166a6e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
@@ -92,6 +92,150 @@ entry:
ret <vscale x 1 x i32> %va
}
+define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee2(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O2-LABEL: test_vector_callee2:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 12
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: sub sp, sp, a0
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 11
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 10
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 3
+; SPILL-O2-NEXT: add a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 3
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 11
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 10
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 3
+; SPILL-O2-NEXT: add a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 3
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 12
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ call void asm sideeffect "",
+ "~{v1},~{v3},~{v5},~{v7},~{v24m2},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+ ret <vscale x 1 x i32> %va
+}
+
+define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee3(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O2-LABEL: test_vector_callee3:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 10
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: sub sp, sp, a0
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 3
+; SPILL-O2-NEXT: add a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 3
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 6
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs2r.v v2, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 2
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs2r.v v26, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 3
+; SPILL-O2-NEXT: add a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 3
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 6
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 2
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl2r.v v26, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vl4r.v v28, (a0) # vscale x 32-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 10
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ call void asm sideeffect "",
+ "~{v1},~{v2},~{v3},~{v24},~{v26m2},~{v28m2},~{v29},~{v30},~{v31}"()
+
+ ret <vscale x 1 x i32> %va
+}
+
; Make sure the local stack allocation pass doesn't count vector registers. The
; sizes are chosen to be on the edge of what RISCVRegister::needsFrameBaseReg
; considers to need a virtual base register.
diff --git a/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
index 5b82b27..81b2b65 100644
--- a/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
@@ -63,10 +63,10 @@ define i32 @vpreduce_add_v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) {
; RV64-NEXT: and a2, t4, a2
; RV64-NEXT: and t0, t3, t1
; RV64-NEXT: and a7, t2, a7
-; RV64-NEXT: negw a7, a7
-; RV64-NEXT: negw t0, t0
-; RV64-NEXT: negw a2, a2
-; RV64-NEXT: negw a3, a3
+; RV64-NEXT: neg a7, a7
+; RV64-NEXT: neg t0, t0
+; RV64-NEXT: neg a2, a2
+; RV64-NEXT: neg a3, a3
; RV64-NEXT: and a4, a7, a4
; RV64-NEXT: and a6, t0, a6
; RV64-NEXT: and a1, a2, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
index 07aa05f..48845c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll
@@ -930,7 +930,7 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
; CHECK-NEXT: add a2, a0, a4
; CHECK-NEXT: slli a5, a4, 2
; CHECK-NEXT: add a1, a1, a4
-; CHECK-NEXT: subw a3, a3, a4
+; CHECK-NEXT: sub a3, a3, a4
; CHECK-NEXT: add a1, a1, a5
; CHECK-NEXT: slli a3, a3, 32
; CHECK-NEXT: srli a3, a3, 32
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index b6253c6..dcf1ab0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -204,7 +204,7 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %
; RV64-SLOW-NEXT: # %bb.1: # %cond.load
; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, tu, ma
; RV64-SLOW-NEXT: vmv.x.s a1, v8
-; RV64-SLOW-NEXT: lwu a2, 4(a1)
+; RV64-SLOW-NEXT: lw a2, 4(a1)
; RV64-SLOW-NEXT: lwu a1, 0(a1)
; RV64-SLOW-NEXT: slli a2, a2, 32
; RV64-SLOW-NEXT: or a1, a2, a1
@@ -216,7 +216,7 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %
; RV64-SLOW-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 1
; RV64-SLOW-NEXT: vmv.x.s a0, v8
-; RV64-SLOW-NEXT: lwu a1, 4(a0)
+; RV64-SLOW-NEXT: lw a1, 4(a0)
; RV64-SLOW-NEXT: lwu a0, 0(a0)
; RV64-SLOW-NEXT: slli a1, a1, 32
; RV64-SLOW-NEXT: or a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
index 1a716f6..e89bac5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
@@ -818,7 +818,7 @@ define <2 x i64> @vwaddu_vx_v2i64_i32(ptr %x, ptr %y) nounwind {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: lwu a0, 0(a1)
+; RV64-NEXT: lw a0, 0(a1)
; RV64-NEXT: vwaddu.vx v8, v9, a0
; RV64-NEXT: ret
%a = load <2 x i32>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll
index 8ebd93e..b933ef9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll
@@ -853,7 +853,7 @@ define <2 x i64> @vwmulsu_vx_v2i64_i32(ptr %x, ptr %y) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vle32.v v9, (a0)
-; RV64-NEXT: lwu a0, 0(a1)
+; RV64-NEXT: lw a0, 0(a1)
; RV64-NEXT: vwmulsu.vx v8, v9, a0
; RV64-NEXT: ret
%a = load <2 x i32>, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
index 90e9ffd..7cedee5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
@@ -710,13 +710,6 @@ define <4 x i32> @vwmulu_vx_v4i32_i8(ptr %x, ptr %y) {
}
define <4 x i32> @vwmulu_vx_v4i32_i16(ptr %x, ptr %y) {
-; CHECK-LABEL: vwmulu_vx_v4i32_i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: lhu a0, 0(a1)
-; CHECK-NEXT: vwmulu.vx v8, v9, a0
-; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %x
%b = load i16, ptr %y
%c = zext i16 %b to i32
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
index bfdda47..86ac038e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
@@ -821,7 +821,7 @@ define <2 x i64> @vwsubu_vx_v2i64_i32(ptr %x, ptr %y) nounwind {
;
; RV64-LABEL: vwsubu_vx_v2i64_i32:
; RV64: # %bb.0:
-; RV64-NEXT: lwu a1, 0(a1)
+; RV64-NEXT: lw a1, 0(a1)
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vle32.v v9, (a0)
; RV64-NEXT: vmv.v.x v10, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index f9ac53b..f481f9c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -274,10 +274,10 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) {
; CHECK-NOV-NEXT: sgtz a6, a2
; CHECK-NOV-NEXT: sgtz a7, a3
; CHECK-NOV-NEXT: sgtz t0, a5
-; CHECK-NOV-NEXT: negw t0, t0
-; CHECK-NOV-NEXT: negw a7, a7
-; CHECK-NOV-NEXT: negw a6, a6
-; CHECK-NOV-NEXT: negw a4, a4
+; CHECK-NOV-NEXT: neg t0, t0
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: and a5, t0, a5
; CHECK-NOV-NEXT: and a3, a7, a3
; CHECK-NOV-NEXT: and a2, a6, a2
@@ -755,10 +755,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NOV-NEXT: sgtz a4, s1
; CHECK-NOV-NEXT: sgtz a5, a1
; CHECK-NOV-NEXT: sgtz a6, a3
-; CHECK-NOV-NEXT: negw a6, a6
-; CHECK-NOV-NEXT: negw a5, a5
-; CHECK-NOV-NEXT: negw a4, a4
-; CHECK-NOV-NEXT: negw a2, a2
+; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: neg a5, a5
+; CHECK-NOV-NEXT: neg a4, a4
+; CHECK-NOV-NEXT: neg a2, a2
; CHECK-NOV-NEXT: and a3, a6, a3
; CHECK-NOV-NEXT: and a1, a5, a1
; CHECK-NOV-NEXT: and a4, a4, s1
@@ -1166,10 +1166,10 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) {
; CHECK-NOV-NEXT: sgtz a6, a2
; CHECK-NOV-NEXT: sgtz a7, a3
; CHECK-NOV-NEXT: sgtz t0, a5
-; CHECK-NOV-NEXT: negw t0, t0
-; CHECK-NOV-NEXT: negw a7, a7
-; CHECK-NOV-NEXT: negw a6, a6
-; CHECK-NOV-NEXT: negw a4, a4
+; CHECK-NOV-NEXT: neg t0, t0
+; CHECK-NOV-NEXT: neg a7, a7
+; CHECK-NOV-NEXT: neg a6, a6
+; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: and a5, t0, a5
; CHECK-NOV-NEXT: and a3, a7, a3
; CHECK-NOV-NEXT: and a2, a6, a2
@@ -2040,14 +2040,14 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
; CHECK-NOV-NEXT: sgtz t4, a5
; CHECK-NOV-NEXT: sgtz t5, a6
; CHECK-NOV-NEXT: sgtz t6, a7
-; CHECK-NOV-NEXT: negw t6, t6
-; CHECK-NOV-NEXT: negw t5, t5
-; CHECK-NOV-NEXT: negw t4, t4
-; CHECK-NOV-NEXT: negw t3, t3
-; CHECK-NOV-NEXT: negw t2, t2
-; CHECK-NOV-NEXT: negw t1, t1
-; CHECK-NOV-NEXT: negw t0, t0
-; CHECK-NOV-NEXT: negw a4, a4
+; CHECK-NOV-NEXT: neg t6, t6
+; CHECK-NOV-NEXT: neg t5, t5
+; CHECK-NOV-NEXT: neg t4, t4
+; CHECK-NOV-NEXT: neg t3, t3
+; CHECK-NOV-NEXT: neg t2, t2
+; CHECK-NOV-NEXT: neg t1, t1
+; CHECK-NOV-NEXT: neg t0, t0
+; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: and a7, t6, a7
; CHECK-NOV-NEXT: and a6, t5, a6
; CHECK-NOV-NEXT: and a5, t4, a5
@@ -3830,16 +3830,16 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: mv a5, a3
; CHECK-NOV-NEXT: .LBB32_5: # %entry
; CHECK-NOV-NEXT: sgtz a3, a5
-; CHECK-NOV-NEXT: negw a3, a3
+; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a5
; CHECK-NOV-NEXT: sgtz a5, a4
-; CHECK-NOV-NEXT: negw a5, a5
+; CHECK-NOV-NEXT: neg a5, a5
; CHECK-NOV-NEXT: and a4, a5, a4
; CHECK-NOV-NEXT: sgtz a5, a2
-; CHECK-NOV-NEXT: negw a5, a5
+; CHECK-NOV-NEXT: neg a5, a5
; CHECK-NOV-NEXT: and a2, a5, a2
; CHECK-NOV-NEXT: sgtz a5, a1
-; CHECK-NOV-NEXT: negw a5, a5
+; CHECK-NOV-NEXT: neg a5, a5
; CHECK-NOV-NEXT: and a1, a5, a1
; CHECK-NOV-NEXT: sw a3, 0(a0)
; CHECK-NOV-NEXT: sw a4, 4(a0)
@@ -4306,16 +4306,16 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NOV-NEXT: mv a3, a2
; CHECK-NOV-NEXT: .LBB35_5: # %entry
; CHECK-NOV-NEXT: sgtz a2, a3
-; CHECK-NOV-NEXT: negw a2, a2
+; CHECK-NOV-NEXT: neg a2, a2
; CHECK-NOV-NEXT: and a2, a2, a3
; CHECK-NOV-NEXT: sgtz a3, a1
-; CHECK-NOV-NEXT: negw a3, a3
+; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a1, a3, a1
; CHECK-NOV-NEXT: sgtz a3, s1
-; CHECK-NOV-NEXT: negw a3, a3
+; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, s1
; CHECK-NOV-NEXT: sgtz a4, a0
-; CHECK-NOV-NEXT: negw a4, a4
+; CHECK-NOV-NEXT: neg a4, a4
; CHECK-NOV-NEXT: and a0, a4, a0
; CHECK-NOV-NEXT: sw a2, 0(s0)
; CHECK-NOV-NEXT: sw a1, 4(s0)
@@ -4707,16 +4707,16 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) {
; CHECK-NOV-NEXT: mv a5, a3
; CHECK-NOV-NEXT: .LBB41_5: # %entry
; CHECK-NOV-NEXT: sgtz a3, a5
-; CHECK-NOV-NEXT: negw a3, a3
+; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a5
; CHECK-NOV-NEXT: sgtz a5, a4
-; CHECK-NOV-NEXT: negw a5, a5
+; CHECK-NOV-NEXT: neg a5, a5
; CHECK-NOV-NEXT: and a4, a5, a4
; CHECK-NOV-NEXT: sgtz a5, a2
-; CHECK-NOV-NEXT: negw a5, a5
+; CHECK-NOV-NEXT: neg a5, a5
; CHECK-NOV-NEXT: and a2, a5, a2
; CHECK-NOV-NEXT: sgtz a5, a1
-; CHECK-NOV-NEXT: negw a5, a5
+; CHECK-NOV-NEXT: neg a5, a5
; CHECK-NOV-NEXT: and a1, a5, a1
; CHECK-NOV-NEXT: sh a3, 0(a0)
; CHECK-NOV-NEXT: sh a4, 2(a0)
@@ -5572,28 +5572,28 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
; CHECK-NOV-NEXT: mv a7, a3
; CHECK-NOV-NEXT: .LBB44_9: # %entry
; CHECK-NOV-NEXT: sgtz a3, a7
-; CHECK-NOV-NEXT: negw a3, a3
+; CHECK-NOV-NEXT: neg a3, a3
; CHECK-NOV-NEXT: and a3, a3, a7
; CHECK-NOV-NEXT: sgtz a7, a6
-; CHECK-NOV-NEXT: negw a7, a7
+; CHECK-NOV-NEXT: neg a7, a7
; CHECK-NOV-NEXT: and a6, a7, a6
; CHECK-NOV-NEXT: sgtz a7, a5
-; CHECK-NOV-NEXT: negw a7, a7
+; CHECK-NOV-NEXT: neg a7, a7
; CHECK-NOV-NEXT: and a5, a7, a5
; CHECK-NOV-NEXT: sgtz a7, a4
-; CHECK-NOV-NEXT: negw a7, a7
+; CHECK-NOV-NEXT: neg a7, a7
; CHECK-NOV-NEXT: and a4, a7, a4
; CHECK-NOV-NEXT: sgtz a7, a2
-; CHECK-NOV-NEXT: negw a7, a7
+; CHECK-NOV-NEXT: neg a7, a7
; CHECK-NOV-NEXT: and a2, a7, a2
; CHECK-NOV-NEXT: sgtz a7, a1
-; CHECK-NOV-NEXT: negw a7, a7
+; CHECK-NOV-NEXT: neg a7, a7
; CHECK-NOV-NEXT: and a1, a7, a1
; CHECK-NOV-NEXT: sgtz a7, s1
-; CHECK-NOV-NEXT: negw a7, a7
+; CHECK-NOV-NEXT: neg a7, a7
; CHECK-NOV-NEXT: and a7, a7, s1
; CHECK-NOV-NEXT: sgtz t0, a0
-; CHECK-NOV-NEXT: negw t0, t0
+; CHECK-NOV-NEXT: neg t0, t0
; CHECK-NOV-NEXT: and a0, t0, a0
; CHECK-NOV-NEXT: sh a2, 8(s0)
; CHECK-NOV-NEXT: sh a1, 10(s0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll b/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll
index af2e8d3..42c2556 100644
--- a/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll
@@ -14,12 +14,8 @@ define void @foo_lmul1() nounwind #0 {
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 1
; CHECK-RV32-NEXT: sub sp, sp, a0
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; CHECK-RV32-NEXT: lui a0, %hi(a)
; CHECK-RV32-NEXT: addi a0, a0, %lo(a)
; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
@@ -31,12 +27,8 @@ define void @foo_lmul1() nounwind #0 {
; CHECK-RV32-NEXT: lui a0, %hi(c)
; CHECK-RV32-NEXT: addi a0, a0, %lo(c)
; CHECK-RV32-NEXT: vse32.v v8, (a0)
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 1
; CHECK-RV32-NEXT: add sp, sp, a0
@@ -62,25 +54,8 @@ define void @foo_lmul2() nounwind #0 {
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 2
; CHECK-RV32-NEXT: sub sp, sp, a0
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; CHECK-RV32-NEXT: lui a0, %hi(d)
; CHECK-RV32-NEXT: addi a0, a0, %lo(d)
; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
@@ -92,25 +67,8 @@ define void @foo_lmul2() nounwind #0 {
; CHECK-RV32-NEXT: lui a0, %hi(f)
; CHECK-RV32-NEXT: addi a0, a0, %lo(f)
; CHECK-RV32-NEXT: vse32.v v8, (a0)
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 2
; CHECK-RV32-NEXT: add sp, sp, a0
@@ -136,56 +94,8 @@ define void @foo_lmul4() nounwind #0 {
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: sub sp, sp, a0
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: slli a1, a0, 2
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-NEXT: lui a0, %hi(g)
; CHECK-RV32-NEXT: addi a0, a0, %lo(g)
; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
@@ -197,50 +107,8 @@ define void @foo_lmul4() nounwind #0 {
; CHECK-RV32-NEXT: lui a0, %hi(i)
; CHECK-RV32-NEXT: addi a0, a0, %lo(i)
; CHECK-RV32-NEXT: vse32.v v8, (a0)
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 2
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add sp, sp, a0
@@ -268,108 +136,12 @@ define void @foo_lmul8() nounwind #0 {
; CHECK-RV32-NEXT: slli a0, a0, 4
; CHECK-RV32-NEXT: sub sp, sp, a0
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 4
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 2
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-NEXT: lui a0, %hi(j)
; CHECK-RV32-NEXT: addi a0, a0, %lo(j)
; CHECK-RV32-NEXT: li a1, 32
@@ -383,108 +155,12 @@ define void @foo_lmul8() nounwind #0 {
; CHECK-RV32-NEXT: addi a0, a0, %lo(l)
; CHECK-RV32-NEXT: vse32.v v8, (a0)
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 4
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 2
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 4
; CHECK-RV32-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll b/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll
index 4d9a6ae..749b2041 100644
--- a/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/known-never-zero.ll
@@ -11,7 +11,7 @@ define i32 @vscale_known_nonzero() {
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: negw a1, a0
+; CHECK-NEXT: neg a1, a0
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: slli a1, a0, 6
; CHECK-NEXT: slli a2, a0, 8
@@ -19,16 +19,16 @@ define i32 @vscale_known_nonzero() {
; CHECK-NEXT: slli a4, a0, 12
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: slli a2, a0, 16
-; CHECK-NEXT: subw a3, a3, a4
+; CHECK-NEXT: sub a3, a3, a4
; CHECK-NEXT: slli a4, a0, 18
-; CHECK-NEXT: subw a2, a2, a4
+; CHECK-NEXT: sub a2, a2, a4
; CHECK-NEXT: slli a4, a0, 4
-; CHECK-NEXT: subw a4, a0, a4
+; CHECK-NEXT: sub a4, a0, a4
; CHECK-NEXT: add a1, a4, a1
; CHECK-NEXT: slli a4, a0, 14
-; CHECK-NEXT: subw a3, a3, a4
+; CHECK-NEXT: sub a3, a3, a4
; CHECK-NEXT: slli a4, a0, 23
-; CHECK-NEXT: subw a2, a2, a4
+; CHECK-NEXT: sub a2, a2, a4
; CHECK-NEXT: slli a0, a0, 27
; CHECK-NEXT: add a1, a1, a3
; CHECK-NEXT: add a0, a2, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
index a050034..a7eaf39 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
@@ -78,12 +78,12 @@ body: |
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vmv0 = COPY $v0
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
- ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 4, 5 /* e32 */, 0 /* tu, mu */
%pt:vrnov0 = COPY $v8
%false:vrnov0 = COPY $v9
%mask:vmv0 = COPY $v0
- %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
- %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */
+ %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 8, 5 /* e32 */, 0 /* tu, mu */
+ %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 4, 5 /* e32 */
...
---
# Shouldn't be converted because false operands are different
@@ -163,3 +163,47 @@ body: |
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
bb.1:
%5:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */
+...
+---
+# Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
+name: preserve_false
+body: |
+ bb.0:
+ liveins: $v8, $v9, $v0, $x8, $x9
+ ; CHECK-LABEL: name: preserve_false
+ ; CHECK: liveins: $v8, $v9, $v0, $x8, $x9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %pt:vrnov0 = COPY $v8
+ ; CHECK-NEXT: %false:vr = COPY $v9
+ ; CHECK-NEXT: %mask:vmv0 = COPY $v0
+ ; CHECK-NEXT: %avl1:gprnox0 = COPY $x8
+ ; CHECK-NEXT: %avl2:gprnox0 = COPY $x9
+ ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
+ %pt:vrnov0 = COPY $v8
+ %false:vr = COPY $v9
+ %mask:vmv0 = COPY $v0
+ %avl1:gprnox0 = COPY $x8
+ %avl2:gprnox0 = COPY $x9
+ %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
+ %5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
+...
+---
+# But we can convert this one because vmerge's avl being <= true's means we don't lose any false elements past avl.
+name: preserve_false_avl_known_le
+body: |
+ bb.0:
+ liveins: $v8, $v9, $v0
+ ; CHECK-LABEL: name: preserve_false_avl_known_le
+ ; CHECK: liveins: $v8, $v9, $v0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %pt:vr = COPY $v8
+ ; CHECK-NEXT: %false:vrnov0 = COPY $v9
+ ; CHECK-NEXT: %mask:vmv0 = COPY $v0
+ ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVMV_V_V_M1_:%[0-9]+]]:vr = PseudoVMV_V_V_M1 %pt, %true, 1, 5 /* e32 */, 0 /* tu, mu */
+ %pt:vrnov0 = COPY $v8
+ %false:vr = COPY $v9
+ %mask:vmv0 = COPY $v0
+ %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 2, 5 /* e32 */, 3 /* ta, ma */
+ %5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 1, 5 /* e32 */
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll
index 3aeb4e8..9ffc84a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll
@@ -71,10 +71,31 @@ define <vscale x 8 x i64> @vpmerge_m8(<vscale x 8 x i64> %x, <vscale x 8 x i64>
ret <vscale x 8 x i64> %1
}
-declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
-declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
-declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
-declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
-declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
-declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
-declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
+; Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
+define <vscale x 2 x i32> @preserve_false(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask, i64 %avl1, i64 %avl2) {
+; CHECK-LABEL: preserve_false:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vle32.v v10, (a0), v0.t
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
+; CHECK-NEXT: ret
+ %true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 %avl1, i64 3)
+ %res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 %avl2)
+ ret <vscale x 2 x i32> %res
+}
+
+; Can fold this because its avl is known to be <= than true, so no elements from false need to be introduced past avl.
+define <vscale x 2 x i32> @preserve_false_avl_known_le(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: preserve_false_avl_known_le:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v9, (a0), v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 2, i64 3)
+ %res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 1)
+ ret <vscale x 2 x i32> %res
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
index 8495dfe..32892bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: --check-prefixes=CHECK,CHECK32,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: --check-prefixes=CHECK,CHECK64,ZVFH
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: --check-prefixes=CHECK,CHECK32,ZVFHMIN
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: --check-prefixes=CHECK,CHECK64,ZVFHMIN
declare <vscale x 1 x i1> @llvm.vp.fcmp.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, metadata, <vscale x 1 x i1>, i32)
@@ -4820,6 +4820,427 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f64(<vscale x 8 x double> %va, do
declare <vscale x 32 x i1> @llvm.vp.fcmp.nxv32f64(<vscale x 32 x double>, <vscale x 32 x double>, metadata, <vscale x 32 x i1>, i32)
define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vscale x 32 x double> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK32-LABEL: fcmp_oeq_vv_nxv32f64:
+; CHECK32: # %bb.0:
+; CHECK32-NEXT: addi sp, sp, -48
+; CHECK32-NEXT: .cfi_def_cfa_offset 48
+; CHECK32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
+; CHECK32-NEXT: .cfi_offset ra, -4
+; CHECK32-NEXT: .cfi_offset s0, -8
+; CHECK32-NEXT: .cfi_offset s1, -12
+; CHECK32-NEXT: .cfi_offset s2, -16
+; CHECK32-NEXT: .cfi_offset s3, -20
+; CHECK32-NEXT: .cfi_offset s4, -24
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: slli a1, a1, 1
+; CHECK32-NEXT: mv a3, a1
+; CHECK32-NEXT: slli a1, a1, 2
+; CHECK32-NEXT: add a3, a3, a1
+; CHECK32-NEXT: slli a1, a1, 1
+; CHECK32-NEXT: add a1, a1, a3
+; CHECK32-NEXT: sub sp, sp, a1
+; CHECK32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 26 * vlenb
+; CHECK32-NEXT: mv s1, a6
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vs1r.v v0, (a1) # vscale x 8-byte Folded Spill
+; CHECK32-NEXT: mv s3, a2
+; CHECK32-NEXT: mv s2, a0
+; CHECK32-NEXT: csrr a0, vlenb
+; CHECK32-NEXT: slli a1, a0, 3
+; CHECK32-NEXT: add a0, a1, a0
+; CHECK32-NEXT: add a0, sp, a0
+; CHECK32-NEXT: addi a0, a0, 16
+; CHECK32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK32-NEXT: csrr a0, vlenb
+; CHECK32-NEXT: slli a0, a0, 1
+; CHECK32-NEXT: mv a1, a0
+; CHECK32-NEXT: slli a0, a0, 3
+; CHECK32-NEXT: add a0, a0, a1
+; CHECK32-NEXT: add a0, sp, a0
+; CHECK32-NEXT: addi a0, a0, 16
+; CHECK32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK32-NEXT: csrr s0, vlenb
+; CHECK32-NEXT: li a1, 24
+; CHECK32-NEXT: mv a0, s0
+; CHECK32-NEXT: call __mulsi3
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vl1r.v v6, (a1) # vscale x 8-byte Folded Reload
+; CHECK32-NEXT: mv a1, a0
+; CHECK32-NEXT: slli a4, s0, 3
+; CHECK32-NEXT: srli s4, s0, 2
+; CHECK32-NEXT: srli a0, s0, 3
+; CHECK32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK32-NEXT: vslidedown.vx v7, v6, s4
+; CHECK32-NEXT: add a2, s3, a4
+; CHECK32-NEXT: vl8re64.v v16, (a2)
+; CHECK32-NEXT: slli a6, s0, 4
+; CHECK32-NEXT: slli a2, s0, 1
+; CHECK32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK32-NEXT: vslidedown.vx v0, v6, a0
+; CHECK32-NEXT: mv a3, s1
+; CHECK32-NEXT: bltu s1, a2, .LBB257_2
+; CHECK32-NEXT: # %bb.1:
+; CHECK32-NEXT: mv a3, a2
+; CHECK32-NEXT: .LBB257_2:
+; CHECK32-NEXT: add a5, s3, a1
+; CHECK32-NEXT: add a1, s2, a4
+; CHECK32-NEXT: vslidedown.vx v9, v7, a0
+; CHECK32-NEXT: csrr a4, vlenb
+; CHECK32-NEXT: slli a7, a4, 4
+; CHECK32-NEXT: add a4, a7, a4
+; CHECK32-NEXT: add a4, sp, a4
+; CHECK32-NEXT: addi a4, a4, 16
+; CHECK32-NEXT: vs1r.v v9, (a4) # vscale x 8-byte Folded Spill
+; CHECK32-NEXT: add a4, s3, a6
+; CHECK32-NEXT: vl8re64.v v24, (s3)
+; CHECK32-NEXT: sub a6, a3, s0
+; CHECK32-NEXT: sltu a7, a3, a6
+; CHECK32-NEXT: addi a7, a7, -1
+; CHECK32-NEXT: and a6, a7, a6
+; CHECK32-NEXT: csrr a7, vlenb
+; CHECK32-NEXT: slli t0, a7, 3
+; CHECK32-NEXT: add a7, t0, a7
+; CHECK32-NEXT: add a7, sp, a7
+; CHECK32-NEXT: addi a7, a7, 16
+; CHECK32-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload
+; CHECK32-NEXT: vsetvli zero, a6, e64, m8, ta, ma
+; CHECK32-NEXT: vmfeq.vv v5, v8, v16, v0.t
+; CHECK32-NEXT: bltu a3, s0, .LBB257_4
+; CHECK32-NEXT: # %bb.3:
+; CHECK32-NEXT: mv a3, s0
+; CHECK32-NEXT: .LBB257_4:
+; CHECK32-NEXT: vmv1r.v v0, v6
+; CHECK32-NEXT: vl8re64.v v8, (a5)
+; CHECK32-NEXT: csrr a5, vlenb
+; CHECK32-NEXT: slli a6, a5, 3
+; CHECK32-NEXT: add a5, a6, a5
+; CHECK32-NEXT: add a5, sp, a5
+; CHECK32-NEXT: addi a5, a5, 16
+; CHECK32-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; CHECK32-NEXT: csrr a5, vlenb
+; CHECK32-NEXT: slli a5, a5, 1
+; CHECK32-NEXT: mv a6, a5
+; CHECK32-NEXT: slli a5, a5, 3
+; CHECK32-NEXT: add a5, a5, a6
+; CHECK32-NEXT: add a5, sp, a5
+; CHECK32-NEXT: addi a5, a5, 16
+; CHECK32-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; CHECK32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK32-NEXT: vmfeq.vv v8, v16, v24, v0.t
+; CHECK32-NEXT: vl8re64.v v16, (a1)
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK32-NEXT: vl8re64.v v16, (a4)
+; CHECK32-NEXT: sub a1, s1, a2
+; CHECK32-NEXT: sltu a2, s1, a1
+; CHECK32-NEXT: vl8re64.v v24, (s2)
+; CHECK32-NEXT: addi a2, a2, -1
+; CHECK32-NEXT: and s1, a2, a1
+; CHECK32-NEXT: vsetvli zero, s4, e8, mf2, tu, ma
+; CHECK32-NEXT: vslideup.vx v8, v5, a0
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: slli a1, a1, 1
+; CHECK32-NEXT: mv a2, a1
+; CHECK32-NEXT: slli a1, a1, 3
+; CHECK32-NEXT: add a1, a1, a2
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vs1r.v v8, (a1) # vscale x 8-byte Folded Spill
+; CHECK32-NEXT: mv a1, s1
+; CHECK32-NEXT: bltu s1, s0, .LBB257_6
+; CHECK32-NEXT: # %bb.5:
+; CHECK32-NEXT: mv a1, s0
+; CHECK32-NEXT: .LBB257_6:
+; CHECK32-NEXT: vmv1r.v v0, v7
+; CHECK32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK32-NEXT: vmfeq.vv v8, v24, v16, v0.t
+; CHECK32-NEXT: addi a1, sp, 16
+; CHECK32-NEXT: vs1r.v v8, (a1) # vscale x 8-byte Folded Spill
+; CHECK32-NEXT: li a1, 3
+; CHECK32-NEXT: call __mulsi3
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: slli a2, a1, 4
+; CHECK32-NEXT: add a1, a2, a1
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vl1r.v v0, (a1) # vscale x 8-byte Folded Reload
+; CHECK32-NEXT: csrr a1, vlenb
+; CHECK32-NEXT: slli a1, a1, 1
+; CHECK32-NEXT: mv a2, a1
+; CHECK32-NEXT: slli a1, a1, 3
+; CHECK32-NEXT: add a1, a1, a2
+; CHECK32-NEXT: add a1, sp, a1
+; CHECK32-NEXT: addi a1, a1, 16
+; CHECK32-NEXT: vl1r.v v9, (a1) # vscale x 8-byte Folded Reload
+; CHECK32-NEXT: addi a1, sp, 16
+; CHECK32-NEXT: vl1r.v v8, (a1) # vscale x 8-byte Folded Reload
+; CHECK32-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK32-NEXT: vslideup.vx v9, v8, s4
+; CHECK32-NEXT: sub a1, s1, s0
+; CHECK32-NEXT: sltu a2, s1, a1
+; CHECK32-NEXT: addi a2, a2, -1
+; CHECK32-NEXT: and a1, a2, a1
+; CHECK32-NEXT: csrr a2, vlenb
+; CHECK32-NEXT: slli a3, a2, 3
+; CHECK32-NEXT: add a2, a3, a2
+; CHECK32-NEXT: add a2, sp, a2
+; CHECK32-NEXT: addi a2, a2, 16
+; CHECK32-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; CHECK32-NEXT: csrr a2, vlenb
+; CHECK32-NEXT: add a2, sp, a2
+; CHECK32-NEXT: addi a2, a2, 16
+; CHECK32-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK32-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK32-NEXT: vmfeq.vv v8, v24, v16, v0.t
+; CHECK32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK32-NEXT: vslideup.vx v9, v8, a0
+; CHECK32-NEXT: vmv1r.v v0, v9
+; CHECK32-NEXT: csrr a0, vlenb
+; CHECK32-NEXT: slli a0, a0, 1
+; CHECK32-NEXT: mv a1, a0
+; CHECK32-NEXT: slli a0, a0, 2
+; CHECK32-NEXT: add a1, a1, a0
+; CHECK32-NEXT: slli a0, a0, 1
+; CHECK32-NEXT: add a0, a0, a1
+; CHECK32-NEXT: add sp, sp, a0
+; CHECK32-NEXT: .cfi_def_cfa sp, 48
+; CHECK32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; CHECK32-NEXT: .cfi_restore ra
+; CHECK32-NEXT: .cfi_restore s0
+; CHECK32-NEXT: .cfi_restore s1
+; CHECK32-NEXT: .cfi_restore s2
+; CHECK32-NEXT: .cfi_restore s3
+; CHECK32-NEXT: .cfi_restore s4
+; CHECK32-NEXT: addi sp, sp, 48
+; CHECK32-NEXT: .cfi_def_cfa_offset 0
+; CHECK32-NEXT: ret
+;
+; CHECK64-LABEL: fcmp_oeq_vv_nxv32f64:
+; CHECK64: # %bb.0:
+; CHECK64-NEXT: addi sp, sp, -64
+; CHECK64-NEXT: .cfi_def_cfa_offset 64
+; CHECK64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
+; CHECK64-NEXT: .cfi_offset ra, -8
+; CHECK64-NEXT: .cfi_offset s0, -16
+; CHECK64-NEXT: .cfi_offset s1, -24
+; CHECK64-NEXT: .cfi_offset s2, -32
+; CHECK64-NEXT: .cfi_offset s3, -40
+; CHECK64-NEXT: .cfi_offset s4, -48
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: slli a1, a1, 1
+; CHECK64-NEXT: mv a3, a1
+; CHECK64-NEXT: slli a1, a1, 2
+; CHECK64-NEXT: add a3, a3, a1
+; CHECK64-NEXT: slli a1, a1, 1
+; CHECK64-NEXT: add a1, a1, a3
+; CHECK64-NEXT: sub sp, sp, a1
+; CHECK64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 26 * vlenb
+; CHECK64-NEXT: mv s1, a6
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vs1r.v v0, (a1) # vscale x 8-byte Folded Spill
+; CHECK64-NEXT: mv s3, a2
+; CHECK64-NEXT: mv s2, a0
+; CHECK64-NEXT: csrr a0, vlenb
+; CHECK64-NEXT: slli a1, a0, 3
+; CHECK64-NEXT: add a0, a1, a0
+; CHECK64-NEXT: add a0, sp, a0
+; CHECK64-NEXT: addi a0, a0, 16
+; CHECK64-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; CHECK64-NEXT: csrr a0, vlenb
+; CHECK64-NEXT: slli a0, a0, 1
+; CHECK64-NEXT: mv a1, a0
+; CHECK64-NEXT: slli a0, a0, 3
+; CHECK64-NEXT: add a0, a0, a1
+; CHECK64-NEXT: add a0, sp, a0
+; CHECK64-NEXT: addi a0, a0, 16
+; CHECK64-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; CHECK64-NEXT: csrr s0, vlenb
+; CHECK64-NEXT: li a1, 24
+; CHECK64-NEXT: mv a0, s0
+; CHECK64-NEXT: call __muldi3
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vl1r.v v6, (a1) # vscale x 8-byte Folded Reload
+; CHECK64-NEXT: mv a1, a0
+; CHECK64-NEXT: slli a4, s0, 3
+; CHECK64-NEXT: srli s4, s0, 2
+; CHECK64-NEXT: srli a0, s0, 3
+; CHECK64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; CHECK64-NEXT: vslidedown.vx v7, v6, s4
+; CHECK64-NEXT: add a2, s3, a4
+; CHECK64-NEXT: vl8re64.v v16, (a2)
+; CHECK64-NEXT: slli a6, s0, 4
+; CHECK64-NEXT: slli a2, s0, 1
+; CHECK64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
+; CHECK64-NEXT: vslidedown.vx v0, v6, a0
+; CHECK64-NEXT: mv a3, s1
+; CHECK64-NEXT: bltu s1, a2, .LBB257_2
+; CHECK64-NEXT: # %bb.1:
+; CHECK64-NEXT: mv a3, a2
+; CHECK64-NEXT: .LBB257_2:
+; CHECK64-NEXT: add a5, s3, a1
+; CHECK64-NEXT: add a1, s2, a4
+; CHECK64-NEXT: vslidedown.vx v9, v7, a0
+; CHECK64-NEXT: csrr a4, vlenb
+; CHECK64-NEXT: slli a7, a4, 4
+; CHECK64-NEXT: add a4, a7, a4
+; CHECK64-NEXT: add a4, sp, a4
+; CHECK64-NEXT: addi a4, a4, 16
+; CHECK64-NEXT: vs1r.v v9, (a4) # vscale x 8-byte Folded Spill
+; CHECK64-NEXT: add a4, s3, a6
+; CHECK64-NEXT: vl8re64.v v24, (s3)
+; CHECK64-NEXT: sub a6, a3, s0
+; CHECK64-NEXT: sltu a7, a3, a6
+; CHECK64-NEXT: addi a7, a7, -1
+; CHECK64-NEXT: and a6, a7, a6
+; CHECK64-NEXT: csrr a7, vlenb
+; CHECK64-NEXT: slli t0, a7, 3
+; CHECK64-NEXT: add a7, t0, a7
+; CHECK64-NEXT: add a7, sp, a7
+; CHECK64-NEXT: addi a7, a7, 16
+; CHECK64-NEXT: vl8r.v v8, (a7) # vscale x 64-byte Folded Reload
+; CHECK64-NEXT: vsetvli zero, a6, e64, m8, ta, ma
+; CHECK64-NEXT: vmfeq.vv v5, v8, v16, v0.t
+; CHECK64-NEXT: bltu a3, s0, .LBB257_4
+; CHECK64-NEXT: # %bb.3:
+; CHECK64-NEXT: mv a3, s0
+; CHECK64-NEXT: .LBB257_4:
+; CHECK64-NEXT: vmv1r.v v0, v6
+; CHECK64-NEXT: vl8re64.v v8, (a5)
+; CHECK64-NEXT: csrr a5, vlenb
+; CHECK64-NEXT: slli a6, a5, 3
+; CHECK64-NEXT: add a5, a6, a5
+; CHECK64-NEXT: add a5, sp, a5
+; CHECK64-NEXT: addi a5, a5, 16
+; CHECK64-NEXT: vs8r.v v8, (a5) # vscale x 64-byte Folded Spill
+; CHECK64-NEXT: csrr a5, vlenb
+; CHECK64-NEXT: slli a5, a5, 1
+; CHECK64-NEXT: mv a6, a5
+; CHECK64-NEXT: slli a5, a5, 3
+; CHECK64-NEXT: add a5, a5, a6
+; CHECK64-NEXT: add a5, sp, a5
+; CHECK64-NEXT: addi a5, a5, 16
+; CHECK64-NEXT: vl8r.v v16, (a5) # vscale x 64-byte Folded Reload
+; CHECK64-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK64-NEXT: vmfeq.vv v8, v16, v24, v0.t
+; CHECK64-NEXT: vl8re64.v v16, (a1)
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK64-NEXT: vl8re64.v v16, (a4)
+; CHECK64-NEXT: sub a1, s1, a2
+; CHECK64-NEXT: sltu a2, s1, a1
+; CHECK64-NEXT: vl8re64.v v24, (s2)
+; CHECK64-NEXT: addi a2, a2, -1
+; CHECK64-NEXT: and s1, a2, a1
+; CHECK64-NEXT: vsetvli zero, s4, e8, mf2, tu, ma
+; CHECK64-NEXT: vslideup.vx v8, v5, a0
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: slli a1, a1, 1
+; CHECK64-NEXT: mv a2, a1
+; CHECK64-NEXT: slli a1, a1, 3
+; CHECK64-NEXT: add a1, a1, a2
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vs1r.v v8, (a1) # vscale x 8-byte Folded Spill
+; CHECK64-NEXT: mv a1, s1
+; CHECK64-NEXT: bltu s1, s0, .LBB257_6
+; CHECK64-NEXT: # %bb.5:
+; CHECK64-NEXT: mv a1, s0
+; CHECK64-NEXT: .LBB257_6:
+; CHECK64-NEXT: vmv1r.v v0, v7
+; CHECK64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK64-NEXT: vmfeq.vv v8, v24, v16, v0.t
+; CHECK64-NEXT: addi a1, sp, 16
+; CHECK64-NEXT: vs1r.v v8, (a1) # vscale x 8-byte Folded Spill
+; CHECK64-NEXT: li a1, 3
+; CHECK64-NEXT: call __muldi3
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: slli a2, a1, 4
+; CHECK64-NEXT: add a1, a2, a1
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vl1r.v v0, (a1) # vscale x 8-byte Folded Reload
+; CHECK64-NEXT: csrr a1, vlenb
+; CHECK64-NEXT: slli a1, a1, 1
+; CHECK64-NEXT: mv a2, a1
+; CHECK64-NEXT: slli a1, a1, 3
+; CHECK64-NEXT: add a1, a1, a2
+; CHECK64-NEXT: add a1, sp, a1
+; CHECK64-NEXT: addi a1, a1, 16
+; CHECK64-NEXT: vl1r.v v9, (a1) # vscale x 8-byte Folded Reload
+; CHECK64-NEXT: addi a1, sp, 16
+; CHECK64-NEXT: vl1r.v v8, (a1) # vscale x 8-byte Folded Reload
+; CHECK64-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK64-NEXT: vslideup.vx v9, v8, s4
+; CHECK64-NEXT: sub a1, s1, s0
+; CHECK64-NEXT: sltu a2, s1, a1
+; CHECK64-NEXT: addi a2, a2, -1
+; CHECK64-NEXT: and a1, a2, a1
+; CHECK64-NEXT: csrr a2, vlenb
+; CHECK64-NEXT: slli a3, a2, 3
+; CHECK64-NEXT: add a2, a3, a2
+; CHECK64-NEXT: add a2, sp, a2
+; CHECK64-NEXT: addi a2, a2, 16
+; CHECK64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
+; CHECK64-NEXT: csrr a2, vlenb
+; CHECK64-NEXT: add a2, sp, a2
+; CHECK64-NEXT: addi a2, a2, 16
+; CHECK64-NEXT: vl8r.v v24, (a2) # vscale x 64-byte Folded Reload
+; CHECK64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK64-NEXT: vmfeq.vv v8, v24, v16, v0.t
+; CHECK64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; CHECK64-NEXT: vslideup.vx v9, v8, a0
+; CHECK64-NEXT: vmv1r.v v0, v9
+; CHECK64-NEXT: csrr a0, vlenb
+; CHECK64-NEXT: slli a0, a0, 1
+; CHECK64-NEXT: mv a1, a0
+; CHECK64-NEXT: slli a0, a0, 2
+; CHECK64-NEXT: add a1, a1, a0
+; CHECK64-NEXT: slli a0, a0, 1
+; CHECK64-NEXT: add a0, a0, a1
+; CHECK64-NEXT: add sp, sp, a0
+; CHECK64-NEXT: .cfi_def_cfa sp, 64
+; CHECK64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
+; CHECK64-NEXT: .cfi_restore ra
+; CHECK64-NEXT: .cfi_restore s0
+; CHECK64-NEXT: .cfi_restore s1
+; CHECK64-NEXT: .cfi_restore s2
+; CHECK64-NEXT: .cfi_restore s3
+; CHECK64-NEXT: .cfi_restore s4
+; CHECK64-NEXT: addi sp, sp, 64
+; CHECK64-NEXT: .cfi_def_cfa_offset 0
+; CHECK64-NEXT: ret
%v = call <vscale x 32 x i1> @llvm.vp.fcmp.nxv32f64(<vscale x 32 x double> %va, <vscale x 32 x double> %vb, metadata !"oeq", <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x i1> %v
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index c216fb6..346e40a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -549,7 +549,7 @@ define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
; CHECK-NEXT: .LBB10_6: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lw a3, 0(a2)
-; CHECK-NEXT: subw a3, a1, a3
+; CHECK-NEXT: sub a3, a1, a3
; CHECK-NEXT: sw a3, 0(a2)
; CHECK-NEXT: addi a2, a2, 4
; CHECK-NEXT: bne a2, a0, .LBB10_6
diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
index 66e114c..f295bd8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll
@@ -2300,7 +2300,7 @@ define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
; CHECK-RV64-NEXT: j .LBB98_5
; CHECK-RV64-NEXT: .LBB98_2: # %vector.ph
; CHECK-RV64-NEXT: srli a3, a4, 1
-; CHECK-RV64-NEXT: negw a2, a3
+; CHECK-RV64-NEXT: neg a2, a3
; CHECK-RV64-NEXT: andi a2, a2, 256
; CHECK-RV64-NEXT: slli a4, a4, 1
; CHECK-RV64-NEXT: mv a5, a0
@@ -2393,7 +2393,7 @@ define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
; CHECK-ZVKB-NOZBB64-NEXT: j .LBB98_5
; CHECK-ZVKB-NOZBB64-NEXT: .LBB98_2: # %vector.ph
; CHECK-ZVKB-NOZBB64-NEXT: srli a3, a4, 1
-; CHECK-ZVKB-NOZBB64-NEXT: negw a2, a3
+; CHECK-ZVKB-NOZBB64-NEXT: neg a2, a3
; CHECK-ZVKB-NOZBB64-NEXT: andi a2, a2, 256
; CHECK-ZVKB-NOZBB64-NEXT: slli a4, a4, 1
; CHECK-ZVKB-NOZBB64-NEXT: mv a5, a0
@@ -2485,7 +2485,7 @@ define void @vand_vx_loop_hoisted_not(ptr %a, i32 noundef signext %mask) {
; CHECK-ZVKB-ZBB64-NEXT: j .LBB98_5
; CHECK-ZVKB-ZBB64-NEXT: .LBB98_2: # %vector.ph
; CHECK-ZVKB-ZBB64-NEXT: srli a3, a4, 1
-; CHECK-ZVKB-ZBB64-NEXT: negw a2, a3
+; CHECK-ZVKB-ZBB64-NEXT: neg a2, a3
; CHECK-ZVKB-ZBB64-NEXT: andi a2, a2, 256
; CHECK-ZVKB-ZBB64-NEXT: slli a4, a4, 1
; CHECK-ZVKB-ZBB64-NEXT: mv a5, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll
index 3740737..d0b184b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll
@@ -50,9 +50,9 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) {
; RV64-NEXT: sgtz a5, a5
; RV64-NEXT: sgtz a4, a4
; RV64-NEXT: sgtz a3, a3
-; RV64-NEXT: negw a3, a3
-; RV64-NEXT: negw a4, a4
-; RV64-NEXT: negw a5, a5
+; RV64-NEXT: neg a3, a3
+; RV64-NEXT: neg a4, a4
+; RV64-NEXT: neg a5, a5
; RV64-NEXT: and a3, a3, a6
; RV64-NEXT: and a0, a4, a0
; RV64-NEXT: and a2, a5, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index 578b67e..f9f0aa6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -542,95 +542,30 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
define {<vscale x 16 x i8>, <vscale x 16 x i8>} @masked_load_factor2(ptr %p) {
; CHECK-LABEL: masked_load_factor2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vl4r.v v12, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
-; CHECK-NEXT: vnsrl.wi v10, v12, 8
+; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
%vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> splat (i1 true), <vscale x 32 x i8> poison)
%deinterleaved.results = call {<vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave2.nxv32i8(<vscale x 32 x i8> %vec)
ret {<vscale x 16 x i8>, <vscale x 16 x i8>} %deinterleaved.results
}
-define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4(ptr %p) {
-; CHECK-LABEL: masked_loat_factor4:
+define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_load_factor4(ptr %p) {
+; CHECK-LABEL: masked_load_factor4:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 2
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
-; CHECK-NEXT: vl4r.v v8, (a0)
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs4r.v v8, (a0)
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vlseg4e8.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: ret
%vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> splat (i1 true), <vscale x 32 x i8> poison)
%deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec)
ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results
}
-define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_mask(ptr %p, <vscale x 8 x i1> %mask) {
-; CHECK-LABEL: masked_loat_factor4_mask:
+define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_load_factor4_mask(ptr %p, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: masked_load_factor4_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: add a3, a1, a2
-; CHECK-NEXT: vmv.v.v v9, v8
-; CHECK-NEXT: srli a4, a2, 2
-; CHECK-NEXT: vmv.v.v v10, v8
-; CHECK-NEXT: srli a5, a2, 3
-; CHECK-NEXT: vmv.v.v v11, v8
-; CHECK-NEXT: vsseg4e8.v v8, (a1)
-; CHECK-NEXT: vl1r.v v8, (a1)
-; CHECK-NEXT: add a1, a4, a5
-; CHECK-NEXT: vl1r.v v9, (a3)
-; CHECK-NEXT: add a3, a3, a2
-; CHECK-NEXT: add a2, a3, a2
-; CHECK-NEXT: vl1r.v v10, (a3)
-; CHECK-NEXT: vl1r.v v11, (a2)
-; CHECK-NEXT: vmsne.vi v9, v9, 0
-; CHECK-NEXT: vmsne.vi v0, v8, 0
-; CHECK-NEXT: vmsne.vi v8, v10, 0
-; CHECK-NEXT: vmsne.vi v10, v11, 0
-; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v0, v9, a5
-; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vx v0, v8, a4
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vslideup.vx v0, v10, a1
-; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0), v0.t
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs4r.v v8, (a0)
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vlseg4e8.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
%interleaved.mask = tail call <vscale x 32 x i1> @llvm.vector.interleave4.nxv32i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask, <vscale x 8 x i1> %mask)
%vec = call <vscale x 32 x i8> @llvm.masked.load(ptr %p, i32 4, <vscale x 32 x i1> %interleaved.mask, <vscale x 32 x i8> poison)
@@ -640,8 +575,8 @@ define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i
; Negative test - some of the deinterleaved elements might come from the
; passthru not the load
-define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_loat_factor4_passthru(ptr %p, <vscale x 8 x i1> %mask, <vscale x 32 x i8> %passthru) {
-; CHECK-LABEL: masked_loat_factor4_passthru:
+define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @masked_load_factor4_passthru(ptr %p, <vscale x 8 x i1> %mask, <vscale x 32 x i8> %passthru) {
+; CHECK-LABEL: masked_load_factor4_passthru:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
index af55aaa..7e7d11e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll
@@ -303,3 +303,26 @@ define void @vector_interleave_store_factor8(<vscale x 2 x i32> %a, <vscale x 2
store <vscale x 16 x i32> %v, ptr %p
ret void
}
+
+define void @masked_store_factor3(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, ptr %p) {
+; CHECK-LABEL: masked_store_factor3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsseg3e32.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = call <vscale x 6 x i32> @llvm.vector.interleave3(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c)
+ call void @llvm.masked.store(<vscale x 6 x i32> %v, ptr %p, i32 4, <vscale x 6 x i1> splat (i1 true))
+ ret void
+}
+
+define void @masked_store_factor3_masked(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, ptr %p, <vscale x 2 x i1> %m) {
+; CHECK-LABEL: masked_store_factor3_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vsseg3e32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+ %interleaved.mask = call <vscale x 6 x i1> @llvm.vector.interleave3(<vscale x 2 x i1> %m, <vscale x 2 x i1> %m, <vscale x 2 x i1> %m)
+ %v = call <vscale x 6 x i32> @llvm.vector.interleave3(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c)
+ call void @llvm.masked.store(<vscale x 6 x i32> %v, ptr %p, i32 4, <vscale x 6 x i1> %interleaved.mask)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll
index 25a226e..eb129da 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll
@@ -959,7 +959,7 @@ define <vscale x 1 x i64> @vrol_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b) {
; CHECK-RV64-LABEL: vrol_vx_nxv1i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: andi a1, a0, 63
-; CHECK-RV64-NEXT: negw a0, a0
+; CHECK-RV64-NEXT: neg a0, a0
; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; CHECK-RV64-NEXT: vsll.vx v9, v8, a1
; CHECK-RV64-NEXT: andi a0, a0, 63
@@ -1022,7 +1022,7 @@ define <vscale x 2 x i64> @vrol_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b) {
; CHECK-RV64-LABEL: vrol_vx_nxv2i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: andi a1, a0, 63
-; CHECK-RV64-NEXT: negw a0, a0
+; CHECK-RV64-NEXT: neg a0, a0
; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m2, ta, ma
; CHECK-RV64-NEXT: vsll.vx v10, v8, a1
; CHECK-RV64-NEXT: andi a0, a0, 63
@@ -1085,7 +1085,7 @@ define <vscale x 4 x i64> @vrol_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b) {
; CHECK-RV64-LABEL: vrol_vx_nxv4i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: andi a1, a0, 63
-; CHECK-RV64-NEXT: negw a0, a0
+; CHECK-RV64-NEXT: neg a0, a0
; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-RV64-NEXT: vsll.vx v12, v8, a1
; CHECK-RV64-NEXT: andi a0, a0, 63
@@ -1148,7 +1148,7 @@ define <vscale x 8 x i64> @vrol_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b) {
; CHECK-RV64-LABEL: vrol_vx_nxv8i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: andi a1, a0, 63
-; CHECK-RV64-NEXT: negw a0, a0
+; CHECK-RV64-NEXT: neg a0, a0
; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; CHECK-RV64-NEXT: vsll.vx v16, v8, a1
; CHECK-RV64-NEXT: andi a0, a0, 63
diff --git a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll
index 9e63b61..97524ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll
@@ -1626,7 +1626,7 @@ define <vscale x 1 x i64> @vror_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b) {
; CHECK-RV64-LABEL: vror_vx_nxv1i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: andi a1, a0, 63
-; CHECK-RV64-NEXT: negw a0, a0
+; CHECK-RV64-NEXT: neg a0, a0
; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; CHECK-RV64-NEXT: vsrl.vx v9, v8, a1
; CHECK-RV64-NEXT: andi a0, a0, 63
@@ -1728,7 +1728,7 @@ define <vscale x 2 x i64> @vror_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b) {
; CHECK-RV64-LABEL: vror_vx_nxv2i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: andi a1, a0, 63
-; CHECK-RV64-NEXT: negw a0, a0
+; CHECK-RV64-NEXT: neg a0, a0
; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m2, ta, ma
; CHECK-RV64-NEXT: vsrl.vx v10, v8, a1
; CHECK-RV64-NEXT: andi a0, a0, 63
@@ -1830,7 +1830,7 @@ define <vscale x 4 x i64> @vror_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b) {
; CHECK-RV64-LABEL: vror_vx_nxv4i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: andi a1, a0, 63
-; CHECK-RV64-NEXT: negw a0, a0
+; CHECK-RV64-NEXT: neg a0, a0
; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m4, ta, ma
; CHECK-RV64-NEXT: vsrl.vx v12, v8, a1
; CHECK-RV64-NEXT: andi a0, a0, 63
@@ -1932,7 +1932,7 @@ define <vscale x 8 x i64> @vror_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b) {
; CHECK-RV64-LABEL: vror_vx_nxv8i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: andi a1, a0, 63
-; CHECK-RV64-NEXT: negw a0, a0
+; CHECK-RV64-NEXT: neg a0, a0
; CHECK-RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; CHECK-RV64-NEXT: vsrl.vx v16, v8, a1
; CHECK-RV64-NEXT: andi a0, a0, 63
diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-power-of-two.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-power-of-two.ll
index 8eef133..4442f97 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vscale-power-of-two.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vscale-power-of-two.ll
@@ -77,7 +77,7 @@ define i64 @con1024_minus_rem() {
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: negw a0, a0
+; CHECK-NEXT: neg a0, a0
; CHECK-NEXT: andi a0, a0, 1024
; CHECK-NEXT: ret
%vscale = call i64 @llvm.vscale.i64()
diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll
index 0ea80bf..2e1784d 100644
--- a/llvm/test/CodeGen/RISCV/select.ll
+++ b/llvm/test/CodeGen/RISCV/select.ll
@@ -647,7 +647,7 @@ define i32 @select_add_1(i1 zeroext %cond, i32 %a, i32 %b) {
;
; RV64IM-LABEL: select_add_1:
; RV64IM: # %bb.0: # %entry
-; RV64IM-NEXT: negw a0, a0
+; RV64IM-NEXT: neg a0, a0
; RV64IM-NEXT: and a0, a0, a1
; RV64IM-NEXT: addw a0, a2, a0
; RV64IM-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index b128abb..b155fea 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -1048,21 +1048,21 @@ define signext i32 @bug(i32 signext %x) {
; CHECK-NEXT: srliw a2, a0, 24
; CHECK-NEXT: seqz a2, a2
; CHECK-NEXT: slli a3, a2, 3
-; CHECK-NEXT: negw a2, a2
+; CHECK-NEXT: neg a2, a2
; CHECK-NEXT: sllw a0, a0, a3
; CHECK-NEXT: andi a2, a2, -8
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: srliw a2, a0, 28
; CHECK-NEXT: seqz a2, a2
; CHECK-NEXT: slli a3, a2, 2
-; CHECK-NEXT: negw a2, a2
+; CHECK-NEXT: neg a2, a2
; CHECK-NEXT: sllw a0, a0, a3
; CHECK-NEXT: andi a2, a2, -4
; CHECK-NEXT: add a1, a1, a2
; CHECK-NEXT: srliw a2, a0, 30
; CHECK-NEXT: seqz a2, a2
; CHECK-NEXT: slli a3, a2, 1
-; CHECK-NEXT: negw a2, a2
+; CHECK-NEXT: neg a2, a2
; CHECK-NEXT: sllw a0, a0, a3
; CHECK-NEXT: andi a2, a2, -2
; CHECK-NEXT: add a1, a1, a2
@@ -1090,21 +1090,21 @@ define signext i32 @bug(i32 signext %x) {
; NOREMOVAL-NEXT: srliw a2, a0, 24
; NOREMOVAL-NEXT: seqz a2, a2
; NOREMOVAL-NEXT: slli a3, a2, 3
-; NOREMOVAL-NEXT: negw a2, a2
+; NOREMOVAL-NEXT: neg a2, a2
; NOREMOVAL-NEXT: sllw a0, a0, a3
; NOREMOVAL-NEXT: andi a2, a2, -8
; NOREMOVAL-NEXT: add a1, a1, a2
; NOREMOVAL-NEXT: srliw a2, a0, 28
; NOREMOVAL-NEXT: seqz a2, a2
; NOREMOVAL-NEXT: slli a3, a2, 2
-; NOREMOVAL-NEXT: negw a2, a2
+; NOREMOVAL-NEXT: neg a2, a2
; NOREMOVAL-NEXT: sllw a0, a0, a3
; NOREMOVAL-NEXT: andi a2, a2, -4
; NOREMOVAL-NEXT: add a1, a1, a2
; NOREMOVAL-NEXT: srliw a2, a0, 30
; NOREMOVAL-NEXT: seqz a2, a2
; NOREMOVAL-NEXT: slli a3, a2, 1
-; NOREMOVAL-NEXT: negw a2, a2
+; NOREMOVAL-NEXT: neg a2, a2
; NOREMOVAL-NEXT: sllw a0, a0, a3
; NOREMOVAL-NEXT: andi a2, a2, -2
; NOREMOVAL-NEXT: add a1, a1, a2
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index 7ca1ee1..1ca23d7 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -383,7 +383,7 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind {
; RV64I-LABEL: fshr64_minsize:
; RV64I: # %bb.0:
; RV64I-NEXT: srl a2, a0, a1
-; RV64I-NEXT: negw a1, a1
+; RV64I-NEXT: neg a1, a1
; RV64I-NEXT: sll a0, a0, a1
; RV64I-NEXT: or a0, a2, a0
; RV64I-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/shl-cttz.ll b/llvm/test/CodeGen/RISCV/shl-cttz.ll
index 99dc4f8..e44d247 100644
--- a/llvm/test/CodeGen/RISCV/shl-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/shl-cttz.ll
@@ -40,7 +40,7 @@ define i8 @shl_cttz_i8(i8 %x, i8 %y) {
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: srli a2, a1, 1
; RV64I-NEXT: andi a2, a2, 85
-; RV64I-NEXT: subw a1, a1, a2
+; RV64I-NEXT: sub a1, a1, a2
; RV64I-NEXT: andi a2, a1, 51
; RV64I-NEXT: srli a1, a1, 2
; RV64I-NEXT: andi a1, a1, 51
@@ -96,7 +96,7 @@ define i8 @shl_cttz_constant_i8(i8 %y) {
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: srli a1, a0, 1
; RV64I-NEXT: andi a1, a1, 85
-; RV64I-NEXT: subw a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: andi a1, a0, 51
; RV64I-NEXT: srli a0, a0, 2
; RV64I-NEXT: andi a0, a0, 51
@@ -276,7 +276,7 @@ define i32 @shl_cttz_i32(i32 %x, i32 %y) {
;
; RV64I-LABEL: shl_cttz_i32:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 30667
; RV64I-NEXT: addi a2, a2, 1329
@@ -333,7 +333,7 @@ define i32 @shl_cttz_i32_zero_is_defined(i32 %x, i32 %y) {
; RV64I-NEXT: sext.w a2, a1
; RV64I-NEXT: beqz a2, .LBB5_2
; RV64I-NEXT: # %bb.1: # %cond.false
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 30667
; RV64I-NEXT: addi a2, a2, 1329
@@ -378,7 +378,7 @@ define i32 @shl_cttz_constant_i32(i32 %y) {
;
; RV64I-LABEL: shl_cttz_constant_i32:
; RV64I: # %bb.0: # %entry
-; RV64I-NEXT: negw a1, a0
+; RV64I-NEXT: neg a1, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: lui a1, 30667
; RV64I-NEXT: addi a1, a1, 1329
@@ -474,7 +474,7 @@ define i32 @shl_cttz_multiuse_i32(i32 %x, i32 %y) {
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
-; RV64I-NEXT: negw a2, a1
+; RV64I-NEXT: neg a2, a1
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: lui a2, 30667
; RV64I-NEXT: addi a2, a2, 1329
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 93fb230..bc23388 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -50,7 +50,7 @@ define i1 @test_srem_odd(i29 %X) nounwind {
; RV64-NEXT: add a2, a2, a4
; RV64-NEXT: slli a4, a0, 2
; RV64-NEXT: add a4, a0, a4
-; RV64-NEXT: subw a1, a1, a4
+; RV64-NEXT: sub a1, a1, a4
; RV64-NEXT: slli a4, a0, 17
; RV64-NEXT: add a3, a3, a4
; RV64-NEXT: slli a0, a0, 23
@@ -59,8 +59,8 @@ define i1 @test_srem_odd(i29 %X) nounwind {
; RV64-NEXT: add a1, a1, a3
; RV64-NEXT: lui a3, 1324
; RV64-NEXT: addi a2, a2, -83
-; RV64-NEXT: subw a0, a0, a2
-; RV64-NEXT: subw a1, a1, a0
+; RV64-NEXT: sub a0, a0, a2
+; RV64-NEXT: sub a1, a1, a0
; RV64-NEXT: slli a1, a1, 35
; RV64-NEXT: srli a1, a1, 35
; RV64-NEXT: addi a0, a3, -165
@@ -189,7 +189,7 @@ define i1 @test_srem_even(i4 %X) nounwind {
; RV64M-NEXT: add a1, a1, a2
; RV64M-NEXT: slli a2, a1, 3
; RV64M-NEXT: slli a1, a1, 1
-; RV64M-NEXT: subw a1, a1, a2
+; RV64M-NEXT: sub a1, a1, a2
; RV64M-NEXT: add a0, a0, a1
; RV64M-NEXT: andi a0, a0, 15
; RV64M-NEXT: addi a0, a0, -1
@@ -225,7 +225,7 @@ define i1 @test_srem_even(i4 %X) nounwind {
; RV64MV-NEXT: add a1, a1, a2
; RV64MV-NEXT: slli a2, a1, 3
; RV64MV-NEXT: slli a1, a1, 1
-; RV64MV-NEXT: subw a1, a1, a2
+; RV64MV-NEXT: sub a1, a1, a2
; RV64MV-NEXT: add a0, a0, a1
; RV64MV-NEXT: andi a0, a0, 15
; RV64MV-NEXT: addi a0, a0, -1
@@ -256,7 +256,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64-NEXT: srli a1, a1, 62
; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: andi a1, a1, 60
-; RV64-NEXT: subw a0, a0, a1
+; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: andi a0, a0, 63
; RV64-NEXT: snez a0, a0
; RV64-NEXT: ret
@@ -280,7 +280,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64M-NEXT: srli a1, a1, 62
; RV64M-NEXT: add a1, a0, a1
; RV64M-NEXT: andi a1, a1, 60
-; RV64M-NEXT: subw a0, a0, a1
+; RV64M-NEXT: sub a0, a0, a1
; RV64M-NEXT: andi a0, a0, 63
; RV64M-NEXT: snez a0, a0
; RV64M-NEXT: ret
@@ -304,7 +304,7 @@ define i1 @test_srem_pow2_setne(i6 %X) nounwind {
; RV64MV-NEXT: srli a1, a1, 62
; RV64MV-NEXT: add a1, a0, a1
; RV64MV-NEXT: andi a1, a1, 60
-; RV64MV-NEXT: subw a0, a0, a1
+; RV64MV-NEXT: sub a0, a0, a1
; RV64MV-NEXT: andi a0, a0, 63
; RV64MV-NEXT: snez a0, a0
; RV64MV-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index 30ffaf6..5129ccc 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -183,10 +183,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
; RV64IM-NEXT: mul a5, a5, t1
; RV64IM-NEXT: li t1, -124
; RV64IM-NEXT: mul a6, a6, t1
-; RV64IM-NEXT: subw a4, a4, a7
-; RV64IM-NEXT: subw a1, a1, t0
-; RV64IM-NEXT: subw a3, a3, a5
-; RV64IM-NEXT: subw a2, a2, a6
+; RV64IM-NEXT: sub a4, a4, a7
+; RV64IM-NEXT: sub a1, a1, t0
+; RV64IM-NEXT: sub a3, a3, a5
+; RV64IM-NEXT: sub a2, a2, a6
; RV64IM-NEXT: sh a3, 0(a0)
; RV64IM-NEXT: sh a2, 2(a0)
; RV64IM-NEXT: sh a4, 4(a0)
@@ -357,10 +357,10 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
; RV64IM-NEXT: mul a7, a7, t1
; RV64IM-NEXT: mul t0, t0, t1
; RV64IM-NEXT: mul a2, a2, t1
-; RV64IM-NEXT: subw a3, a3, a6
-; RV64IM-NEXT: subw a4, a4, a7
-; RV64IM-NEXT: subw a5, a5, t0
-; RV64IM-NEXT: subw a1, a1, a2
+; RV64IM-NEXT: sub a3, a3, a6
+; RV64IM-NEXT: sub a4, a4, a7
+; RV64IM-NEXT: sub a5, a5, t0
+; RV64IM-NEXT: sub a1, a1, a2
; RV64IM-NEXT: sh a3, 0(a0)
; RV64IM-NEXT: sh a4, 2(a0)
; RV64IM-NEXT: sh a5, 4(a0)
@@ -597,10 +597,10 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
; RV64IM-NEXT: add a1, a1, t1
; RV64IM-NEXT: add a3, a3, t0
; RV64IM-NEXT: add a4, a4, a7
-; RV64IM-NEXT: subw a2, a2, a6
-; RV64IM-NEXT: subw a1, a1, t4
-; RV64IM-NEXT: subw a3, a3, t3
-; RV64IM-NEXT: subw a4, a4, t2
+; RV64IM-NEXT: sub a2, a2, a6
+; RV64IM-NEXT: sub a1, a1, t4
+; RV64IM-NEXT: sub a3, a3, t3
+; RV64IM-NEXT: sub a4, a4, t2
; RV64IM-NEXT: sh a2, 0(a0)
; RV64IM-NEXT: sh a1, 2(a0)
; RV64IM-NEXT: sh a3, 4(a0)
@@ -703,15 +703,15 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV64I-NEXT: srli a1, a2, 58
; RV64I-NEXT: add a1, a2, a1
; RV64I-NEXT: andi a1, a1, -64
-; RV64I-NEXT: subw s1, a2, a1
+; RV64I-NEXT: sub s1, a2, a1
; RV64I-NEXT: srli a1, a3, 59
; RV64I-NEXT: add a1, a3, a1
; RV64I-NEXT: andi a1, a1, -32
-; RV64I-NEXT: subw s2, a3, a1
+; RV64I-NEXT: sub s2, a3, a1
; RV64I-NEXT: srli a1, a4, 61
; RV64I-NEXT: add a1, a4, a1
; RV64I-NEXT: andi a1, a1, -8
-; RV64I-NEXT: subw s3, a4, a1
+; RV64I-NEXT: sub s3, a4, a1
; RV64I-NEXT: li a1, 95
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: sh s1, 0(s0)
@@ -737,23 +737,23 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
; RV64IM-NEXT: srli a6, a2, 58
; RV64IM-NEXT: add a6, a2, a6
; RV64IM-NEXT: andi a6, a6, -64
-; RV64IM-NEXT: subw a2, a2, a6
+; RV64IM-NEXT: sub a2, a2, a6
; RV64IM-NEXT: srli a6, a3, 59
; RV64IM-NEXT: add a6, a3, a6
; RV64IM-NEXT: andi a6, a6, -32
-; RV64IM-NEXT: subw a3, a3, a6
+; RV64IM-NEXT: sub a3, a3, a6
; RV64IM-NEXT: srli a6, a4, 61
; RV64IM-NEXT: mulh a5, a1, a5
; RV64IM-NEXT: add a6, a4, a6
; RV64IM-NEXT: add a5, a5, a1
; RV64IM-NEXT: andi a6, a6, -8
-; RV64IM-NEXT: subw a4, a4, a6
+; RV64IM-NEXT: sub a4, a4, a6
; RV64IM-NEXT: srli a6, a5, 63
; RV64IM-NEXT: srli a5, a5, 6
; RV64IM-NEXT: add a5, a5, a6
; RV64IM-NEXT: li a6, 95
; RV64IM-NEXT: mul a5, a5, a6
-; RV64IM-NEXT: subw a1, a1, a5
+; RV64IM-NEXT: sub a1, a1, a5
; RV64IM-NEXT: sh a2, 0(a0)
; RV64IM-NEXT: sh a3, 2(a0)
; RV64IM-NEXT: sh a4, 4(a0)
@@ -909,9 +909,9 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
; RV64IM-NEXT: mul a6, a6, a7
; RV64IM-NEXT: li a7, 23
; RV64IM-NEXT: mul a4, a4, a7
-; RV64IM-NEXT: subw a2, a2, a5
-; RV64IM-NEXT: subw a1, a1, a6
-; RV64IM-NEXT: subw a3, a3, a4
+; RV64IM-NEXT: sub a2, a2, a5
+; RV64IM-NEXT: sub a1, a1, a6
+; RV64IM-NEXT: sub a3, a3, a4
; RV64IM-NEXT: sh zero, 0(a0)
; RV64IM-NEXT: sh a2, 2(a0)
; RV64IM-NEXT: sh a3, 4(a0)
@@ -1011,7 +1011,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV64I-NEXT: add a1, a2, a1
; RV64I-NEXT: lui a3, 8
; RV64I-NEXT: and a1, a1, a3
-; RV64I-NEXT: subw s3, a2, a1
+; RV64I-NEXT: sub s3, a2, a1
; RV64I-NEXT: li a1, 23
; RV64I-NEXT: call __moddi3
; RV64I-NEXT: mv s2, a0
@@ -1050,7 +1050,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV64IM-NEXT: add a5, a5, a7
; RV64IM-NEXT: mulh a4, a3, a4
; RV64IM-NEXT: add a4, a4, a3
-; RV64IM-NEXT: subw a2, a2, a6
+; RV64IM-NEXT: sub a2, a2, a6
; RV64IM-NEXT: srli a6, a4, 63
; RV64IM-NEXT: srli a4, a4, 4
; RV64IM-NEXT: add a4, a4, a6
@@ -1059,8 +1059,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
; RV64IM-NEXT: mul a5, a5, a6
; RV64IM-NEXT: li a6, 23
; RV64IM-NEXT: mul a4, a4, a6
-; RV64IM-NEXT: subw a1, a1, a5
-; RV64IM-NEXT: subw a3, a3, a4
+; RV64IM-NEXT: sub a1, a1, a5
+; RV64IM-NEXT: sub a3, a3, a4
; RV64IM-NEXT: sh zero, 0(a0)
; RV64IM-NEXT: sh a2, 2(a0)
; RV64IM-NEXT: sh a3, 4(a0)
diff --git a/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll
index 3007c35..0c13a1d 100644
--- a/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll
+++ b/llvm/test/CodeGen/RISCV/typepromotion-overflow.ll
@@ -26,7 +26,7 @@ define zeroext i16 @overflow_add(i16 zeroext %a, i16 zeroext %b) {
define zeroext i16 @overflow_sub(i16 zeroext %a, i16 zeroext %b) {
; CHECK-LABEL: overflow_sub:
; CHECK: # %bb.0:
-; CHECK-NEXT: subw a0, a0, a1
+; CHECK-NEXT: sub a0, a0, a1
; CHECK-NEXT: ori a0, a0, 1
; CHECK-NEXT: slli a0, a0, 48
; CHECK-NEXT: srli a0, a0, 48
diff --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll
index af5121d..ee49612 100644
--- a/llvm/test/CodeGen/RISCV/urem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll
@@ -48,7 +48,7 @@ define i32 @fold_urem_positive_odd(i32 %x) nounwind {
; RV64IM-NEXT: slli a2, a2, 32
; RV64IM-NEXT: mulhu a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 32
-; RV64IM-NEXT: subw a2, a0, a1
+; RV64IM-NEXT: sub a2, a0, a1
; RV64IM-NEXT: srliw a2, a2, 1
; RV64IM-NEXT: add a1, a2, a1
; RV64IM-NEXT: srli a1, a1, 6
@@ -174,7 +174,7 @@ define i32 @combine_urem_udiv(i32 %x) nounwind {
; RV64IM-NEXT: slli a2, a2, 32
; RV64IM-NEXT: mulhu a1, a1, a2
; RV64IM-NEXT: srli a1, a1, 32
-; RV64IM-NEXT: subw a2, a0, a1
+; RV64IM-NEXT: sub a2, a0, a1
; RV64IM-NEXT: srliw a2, a2, 1
; RV64IM-NEXT: add a1, a2, a1
; RV64IM-NEXT: li a2, 95
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index d33c666..636fdfa 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -31,11 +31,11 @@ define i1 @test_urem_odd(i13 %X) nounwind {
; RV64-NEXT: slli a1, a0, 4
; RV64-NEXT: slli a2, a0, 6
; RV64-NEXT: slli a3, a0, 8
-; RV64-NEXT: subw a1, a1, a2
+; RV64-NEXT: sub a1, a1, a2
; RV64-NEXT: slli a2, a0, 10
-; RV64-NEXT: subw a3, a3, a2
+; RV64-NEXT: sub a3, a3, a2
; RV64-NEXT: slli a2, a0, 2
-; RV64-NEXT: subw a2, a0, a2
+; RV64-NEXT: sub a2, a0, a2
; RV64-NEXT: slli a0, a0, 12
; RV64-NEXT: add a1, a2, a1
; RV64-NEXT: add a0, a3, a0
@@ -138,10 +138,10 @@ define i1 @test_urem_even(i27 %X) nounwind {
; RV64-NEXT: slli a4, a0, 18
; RV64-NEXT: add a3, a3, a4
; RV64-NEXT: slli a0, a0, 27
-; RV64-NEXT: subw a0, a0, a2
+; RV64-NEXT: sub a0, a0, a2
; RV64-NEXT: lui a2, 2341
; RV64-NEXT: add a1, a1, a3
-; RV64-NEXT: subw a0, a0, a1
+; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: slli a1, a0, 26
; RV64-NEXT: slli a0, a0, 37
; RV64-NEXT: srli a0, a0, 38
@@ -234,8 +234,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind {
; RV64-LABEL: test_urem_odd_setne:
; RV64: # %bb.0:
; RV64-NEXT: slli a1, a0, 1
-; RV64-NEXT: negw a0, a0
-; RV64-NEXT: subw a0, a0, a1
+; RV64-NEXT: neg a0, a0
+; RV64-NEXT: sub a0, a0, a1
; RV64-NEXT: andi a0, a0, 15
; RV64-NEXT: sltiu a0, a0, 4
; RV64-NEXT: xori a0, a0, 1
@@ -254,8 +254,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind {
; RV64M-LABEL: test_urem_odd_setne:
; RV64M: # %bb.0:
; RV64M-NEXT: slli a1, a0, 1
-; RV64M-NEXT: negw a0, a0
-; RV64M-NEXT: subw a0, a0, a1
+; RV64M-NEXT: neg a0, a0
+; RV64M-NEXT: sub a0, a0, a1
; RV64M-NEXT: andi a0, a0, 15
; RV64M-NEXT: sltiu a0, a0, 4
; RV64M-NEXT: xori a0, a0, 1
@@ -274,8 +274,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind {
; RV64MV-LABEL: test_urem_odd_setne:
; RV64MV: # %bb.0:
; RV64MV-NEXT: slli a1, a0, 1
-; RV64MV-NEXT: negw a0, a0
-; RV64MV-NEXT: subw a0, a0, a1
+; RV64MV-NEXT: neg a0, a0
+; RV64MV-NEXT: sub a0, a0, a1
; RV64MV-NEXT: andi a0, a0, 15
; RV64MV-NEXT: sltiu a0, a0, 4
; RV64MV-NEXT: xori a0, a0, 1
@@ -306,9 +306,9 @@ define i1 @test_urem_negative_odd(i9 %X) nounwind {
; RV64-NEXT: slli a1, a0, 2
; RV64-NEXT: slli a2, a0, 4
; RV64-NEXT: slli a3, a0, 6
-; RV64-NEXT: subw a1, a1, a0
-; RV64-NEXT: subw a2, a2, a3
-; RV64-NEXT: subw a1, a1, a2
+; RV64-NEXT: sub a1, a1, a0
+; RV64-NEXT: sub a2, a2, a3
+; RV64-NEXT: sub a1, a1, a2
; RV64-NEXT: slli a0, a0, 8
; RV64-NEXT: add a0, a1, a0
; RV64-NEXT: andi a0, a0, 511
@@ -437,7 +437,7 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64-NEXT: addi a2, a2, -2
; RV64-NEXT: add a1, a1, a4
; RV64-NEXT: add a5, a5, a6
-; RV64-NEXT: subw a4, t0, a7
+; RV64-NEXT: sub a4, t0, a7
; RV64-NEXT: slli a6, a3, 3
; RV64-NEXT: slli a7, a3, 6
; RV64-NEXT: slli t0, a3, 9
@@ -447,18 +447,18 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64-NEXT: slli a6, a2, 4
; RV64-NEXT: add a7, a7, t0
; RV64-NEXT: slli t0, a2, 6
-; RV64-NEXT: subw a6, a6, t0
+; RV64-NEXT: sub a6, a6, t0
; RV64-NEXT: slli t0, a2, 8
-; RV64-NEXT: subw a5, a5, a2
+; RV64-NEXT: sub a5, a5, a2
; RV64-NEXT: slli a2, a2, 10
-; RV64-NEXT: subw a2, t0, a2
-; RV64-NEXT: subw a4, a4, a1
+; RV64-NEXT: sub a2, t0, a2
+; RV64-NEXT: sub a4, a4, a1
; RV64-NEXT: add a3, a3, a7
-; RV64-NEXT: subw a1, a5, a6
+; RV64-NEXT: sub a1, a5, a6
; RV64-NEXT: slli a5, a4, 10
; RV64-NEXT: slli a4, a4, 53
-; RV64-NEXT: negw a3, a3
-; RV64-NEXT: subw a1, a1, a2
+; RV64-NEXT: neg a3, a3
+; RV64-NEXT: sub a1, a1, a2
; RV64-NEXT: srli a4, a4, 54
; RV64-NEXT: andi a2, a3, 2047
; RV64-NEXT: andi a1, a1, 2047
diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
index 3ef9f3f..5a3dfd1 100644
--- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
@@ -157,10 +157,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
; RV64IM-NEXT: mul a7, a7, t1
; RV64IM-NEXT: slli t1, a5, 7
; RV64IM-NEXT: slli a5, a5, 2
-; RV64IM-NEXT: subw a5, a5, t1
-; RV64IM-NEXT: subw a2, a2, a6
-; RV64IM-NEXT: subw a4, a4, t0
-; RV64IM-NEXT: subw a1, a1, a7
+; RV64IM-NEXT: sub a5, a5, t1
+; RV64IM-NEXT: sub a2, a2, a6
+; RV64IM-NEXT: sub a4, a4, t0
+; RV64IM-NEXT: sub a1, a1, a7
; RV64IM-NEXT: add a3, a3, a5
; RV64IM-NEXT: sh a2, 0(a0)
; RV64IM-NEXT: sh a3, 2(a0)
@@ -300,10 +300,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
; RV64IM-NEXT: mul t0, t0, a6
; RV64IM-NEXT: mul t1, t1, a6
; RV64IM-NEXT: mul a2, a2, a6
-; RV64IM-NEXT: subw a3, a3, a7
-; RV64IM-NEXT: subw a4, a4, t0
-; RV64IM-NEXT: subw a5, a5, t1
-; RV64IM-NEXT: subw a1, a1, a2
+; RV64IM-NEXT: sub a3, a3, a7
+; RV64IM-NEXT: sub a4, a4, t0
+; RV64IM-NEXT: sub a5, a5, t1
+; RV64IM-NEXT: sub a1, a1, a2
; RV64IM-NEXT: sh a3, 0(a0)
; RV64IM-NEXT: sh a4, 2(a0)
; RV64IM-NEXT: sh a5, 4(a0)
@@ -508,10 +508,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
; RV64IM-NEXT: add a1, a1, t1
; RV64IM-NEXT: add a3, a3, t0
; RV64IM-NEXT: add a4, a4, a7
-; RV64IM-NEXT: subw a2, a2, a6
-; RV64IM-NEXT: subw a1, a1, t4
-; RV64IM-NEXT: subw a3, a3, t3
-; RV64IM-NEXT: subw a4, a4, t2
+; RV64IM-NEXT: sub a2, a2, a6
+; RV64IM-NEXT: sub a1, a1, t4
+; RV64IM-NEXT: sub a3, a3, t3
+; RV64IM-NEXT: sub a4, a4, t2
; RV64IM-NEXT: sh a2, 0(a0)
; RV64IM-NEXT: sh a1, 2(a0)
; RV64IM-NEXT: sh a3, 4(a0)
@@ -622,7 +622,7 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
; RV64IM-NEXT: andi a4, a4, 7
; RV64IM-NEXT: mulhu a5, a1, a5
; RV64IM-NEXT: mul a5, a5, a6
-; RV64IM-NEXT: subw a1, a1, a5
+; RV64IM-NEXT: sub a1, a1, a5
; RV64IM-NEXT: sh a2, 0(a0)
; RV64IM-NEXT: sh a3, 2(a0)
; RV64IM-NEXT: sh a4, 4(a0)
@@ -757,9 +757,9 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
; RV64IM-NEXT: addi a7, a7, 1327
; RV64IM-NEXT: mulhu a5, a1, a5
; RV64IM-NEXT: mul a5, a5, a7
-; RV64IM-NEXT: subw a2, a2, a4
-; RV64IM-NEXT: subw a3, a3, a6
-; RV64IM-NEXT: subw a1, a1, a5
+; RV64IM-NEXT: sub a2, a2, a4
+; RV64IM-NEXT: sub a3, a3, a6
+; RV64IM-NEXT: sub a1, a1, a5
; RV64IM-NEXT: sh zero, 0(a0)
; RV64IM-NEXT: sh a2, 2(a0)
; RV64IM-NEXT: sh a3, 4(a0)
diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
index 32753ca..cd7f30d 100644
--- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
@@ -716,92 +716,101 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: or a4, a6, a5
+; RV32I-NEXT: lbu a5, 8(a0)
+; RV32I-NEXT: lbu a6, 9(a0)
+; RV32I-NEXT: lbu t3, 10(a0)
+; RV32I-NEXT: lbu t4, 11(a0)
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or a4, a4, a3
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: or a3, t0, a7
-; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 9(a0)
-; RV32I-NEXT: lbu t0, 10(a0)
-; RV32I-NEXT: lbu t3, 11(a0)
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: slli t0, t0, 16
-; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a7, t3, t0
-; RV32I-NEXT: lbu t0, 12(a0)
-; RV32I-NEXT: lbu t2, 13(a0)
-; RV32I-NEXT: lbu t3, 14(a0)
-; RV32I-NEXT: lbu t4, 15(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: or t0, t2, t1
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: lbu a6, 12(a0)
+; RV32I-NEXT: lbu t1, 13(a0)
+; RV32I-NEXT: lbu t2, 14(a0)
+; RV32I-NEXT: lbu a0, 15(a0)
+; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t4, t4, 24
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: or a6, t1, a6
+; RV32I-NEXT: or a0, a0, t2
+; RV32I-NEXT: lbu t1, 1(a1)
+; RV32I-NEXT: lbu t2, 0(a1)
+; RV32I-NEXT: lbu t4, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t1, t1, t2
; RV32I-NEXT: sw zero, 16(sp)
; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 24(sp)
; RV32I-NEXT: sw zero, 28(sp)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or a1, t2, t0
-; RV32I-NEXT: mv t0, sp
-; RV32I-NEXT: slli t3, t3, 16
-; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: srli t3, a0, 3
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: andi a5, a0, 31
-; RV32I-NEXT: andi t3, t3, 12
-; RV32I-NEXT: xori a5, a5, 31
-; RV32I-NEXT: or a3, t1, a3
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a1, t2, a1
-; RV32I-NEXT: add t0, t0, t3
-; RV32I-NEXT: sw a4, 0(sp)
-; RV32I-NEXT: sw a3, 4(sp)
-; RV32I-NEXT: sw a6, 8(sp)
-; RV32I-NEXT: sw a1, 12(sp)
-; RV32I-NEXT: lw a1, 4(t0)
-; RV32I-NEXT: lw a3, 8(t0)
-; RV32I-NEXT: lw a4, 0(t0)
-; RV32I-NEXT: lw a6, 12(t0)
-; RV32I-NEXT: srl a7, a1, a0
-; RV32I-NEXT: slli t0, a3, 1
-; RV32I-NEXT: srl a4, a4, a0
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: srl a3, a3, a0
-; RV32I-NEXT: slli t1, a6, 1
-; RV32I-NEXT: srl a0, a6, a0
-; RV32I-NEXT: sll a6, t0, a5
-; RV32I-NEXT: sll a1, a1, a5
-; RV32I-NEXT: sll a5, t1, a5
+; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or a1, a1, t4
+; RV32I-NEXT: mv t2, sp
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: or a4, t0, a7
+; RV32I-NEXT: or a5, t3, a5
+; RV32I-NEXT: or a0, a0, a6
+; RV32I-NEXT: or a1, a1, t1
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a0, 12(sp)
+; RV32I-NEXT: srli a0, a1, 3
+; RV32I-NEXT: andi a3, a1, 31
+; RV32I-NEXT: andi a0, a0, 12
+; RV32I-NEXT: xori a3, a3, 31
+; RV32I-NEXT: add a0, t2, a0
+; RV32I-NEXT: lw a4, 4(a0)
+; RV32I-NEXT: lw a5, 8(a0)
+; RV32I-NEXT: lw a6, 0(a0)
+; RV32I-NEXT: lw a0, 12(a0)
+; RV32I-NEXT: srl a7, a4, a1
+; RV32I-NEXT: slli t0, a5, 1
+; RV32I-NEXT: srl a6, a6, a1
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: srl a5, a5, a1
+; RV32I-NEXT: slli t1, a0, 1
+; RV32I-NEXT: srl a0, a0, a1
+; RV32I-NEXT: sll a1, t0, a3
+; RV32I-NEXT: sll a4, a4, a3
+; RV32I-NEXT: sll a3, t1, a3
; RV32I-NEXT: srli t0, a0, 16
; RV32I-NEXT: srli t1, a0, 24
; RV32I-NEXT: srli t2, a0, 8
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a1, a4, a1
-; RV32I-NEXT: or a3, a3, a5
+; RV32I-NEXT: or a1, a7, a1
+; RV32I-NEXT: or a4, a6, a4
+; RV32I-NEXT: or a3, a5, a3
; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb t0, 14(a2)
; RV32I-NEXT: sb t1, 15(a2)
; RV32I-NEXT: srli a0, a3, 16
-; RV32I-NEXT: srli a4, a3, 24
-; RV32I-NEXT: srli a5, a3, 8
-; RV32I-NEXT: srli a7, a1, 16
-; RV32I-NEXT: srli t0, a1, 24
-; RV32I-NEXT: srli t1, a1, 8
-; RV32I-NEXT: srli t2, a6, 16
-; RV32I-NEXT: srli t3, a6, 24
+; RV32I-NEXT: srli a5, a3, 24
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: srli a7, a4, 16
+; RV32I-NEXT: srli t0, a4, 24
+; RV32I-NEXT: srli t1, a4, 8
+; RV32I-NEXT: srli t2, a1, 16
+; RV32I-NEXT: srli t3, a1, 24
; RV32I-NEXT: sb a3, 8(a2)
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: sb a6, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a0, a6, 8
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 11(a2)
+; RV32I-NEXT: srli a0, a1, 8
+; RV32I-NEXT: sb a4, 0(a2)
; RV32I-NEXT: sb t1, 1(a2)
; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: sb t0, 3(a2)
-; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: sb a1, 4(a2)
; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: sb t2, 6(a2)
; RV32I-NEXT: sb t3, 7(a2)
@@ -943,93 +952,102 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: or a4, a6, a5
+; RV32I-NEXT: lbu a5, 8(a0)
+; RV32I-NEXT: lbu a6, 9(a0)
+; RV32I-NEXT: lbu t3, 10(a0)
+; RV32I-NEXT: lbu t4, 11(a0)
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or a4, a4, a3
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: or a3, t0, a7
-; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 9(a0)
-; RV32I-NEXT: lbu t0, 10(a0)
-; RV32I-NEXT: lbu t3, 11(a0)
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: slli t0, t0, 16
-; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a7, t3, t0
-; RV32I-NEXT: lbu t0, 12(a0)
-; RV32I-NEXT: lbu t2, 13(a0)
-; RV32I-NEXT: lbu t3, 14(a0)
-; RV32I-NEXT: lbu t4, 15(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: or t0, t2, t1
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: lbu a6, 12(a0)
+; RV32I-NEXT: lbu t1, 13(a0)
+; RV32I-NEXT: lbu t2, 14(a0)
+; RV32I-NEXT: lbu a0, 15(a0)
+; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t4, t4, 24
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: or a6, t1, a6
+; RV32I-NEXT: or a0, a0, t2
+; RV32I-NEXT: lbu t1, 1(a1)
+; RV32I-NEXT: lbu t2, 0(a1)
+; RV32I-NEXT: lbu t4, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t1, t1, t2
; RV32I-NEXT: sw zero, 0(sp)
; RV32I-NEXT: sw zero, 4(sp)
; RV32I-NEXT: sw zero, 8(sp)
; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or a1, t2, t0
-; RV32I-NEXT: addi t0, sp, 16
-; RV32I-NEXT: slli t3, t3, 16
-; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: srli t3, a0, 3
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: andi a5, a0, 31
-; RV32I-NEXT: andi t3, t3, 12
-; RV32I-NEXT: or a3, t1, a3
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a1, t2, a1
-; RV32I-NEXT: sub a7, t0, t3
-; RV32I-NEXT: sw a4, 16(sp)
-; RV32I-NEXT: sw a3, 20(sp)
-; RV32I-NEXT: sw a6, 24(sp)
-; RV32I-NEXT: sw a1, 28(sp)
-; RV32I-NEXT: lw a1, 0(a7)
-; RV32I-NEXT: lw a3, 4(a7)
-; RV32I-NEXT: lw a4, 8(a7)
-; RV32I-NEXT: lw a6, 12(a7)
-; RV32I-NEXT: xori a5, a5, 31
-; RV32I-NEXT: sll a7, a3, a0
-; RV32I-NEXT: srli t0, a1, 1
-; RV32I-NEXT: sll a6, a6, a0
-; RV32I-NEXT: srli t1, a4, 1
-; RV32I-NEXT: sll a4, a4, a0
-; RV32I-NEXT: srli a3, a3, 1
-; RV32I-NEXT: sll a0, a1, a0
-; RV32I-NEXT: srl a1, t0, a5
-; RV32I-NEXT: srl t0, t1, a5
-; RV32I-NEXT: srl a3, a3, a5
-; RV32I-NEXT: srli a5, a0, 16
-; RV32I-NEXT: srli t1, a0, 24
-; RV32I-NEXT: srli t2, a0, 8
-; RV32I-NEXT: or a1, a7, a1
-; RV32I-NEXT: or a6, a6, t0
+; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or a1, a1, t4
+; RV32I-NEXT: addi t2, sp, 16
; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: or a4, t0, a7
+; RV32I-NEXT: or a5, t3, a5
+; RV32I-NEXT: or a0, a0, a6
+; RV32I-NEXT: or a1, a1, t1
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw a5, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
+; RV32I-NEXT: srli a0, a1, 3
+; RV32I-NEXT: andi a3, a1, 31
+; RV32I-NEXT: andi a0, a0, 12
+; RV32I-NEXT: sub a0, t2, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
+; RV32I-NEXT: lw a6, 8(a0)
+; RV32I-NEXT: lw a0, 12(a0)
+; RV32I-NEXT: xori a3, a3, 31
+; RV32I-NEXT: sll a7, a5, a1
+; RV32I-NEXT: srli t0, a4, 1
+; RV32I-NEXT: sll a0, a0, a1
+; RV32I-NEXT: srli t1, a6, 1
+; RV32I-NEXT: sll a6, a6, a1
+; RV32I-NEXT: srli a5, a5, 1
+; RV32I-NEXT: sll a1, a4, a1
+; RV32I-NEXT: srl a4, t0, a3
+; RV32I-NEXT: srl t0, t1, a3
+; RV32I-NEXT: srl a3, a5, a3
+; RV32I-NEXT: srli a5, a1, 16
+; RV32I-NEXT: srli t1, a1, 24
+; RV32I-NEXT: srli t2, a1, 8
+; RV32I-NEXT: or a4, a7, a4
+; RV32I-NEXT: or a0, a0, t0
+; RV32I-NEXT: or a3, a6, a3
+; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb t2, 1(a2)
; RV32I-NEXT: sb a5, 2(a2)
; RV32I-NEXT: sb t1, 3(a2)
-; RV32I-NEXT: srli a0, a3, 16
-; RV32I-NEXT: srli a4, a3, 24
-; RV32I-NEXT: srli a5, a3, 8
-; RV32I-NEXT: srli a7, a6, 16
-; RV32I-NEXT: srli t0, a6, 24
-; RV32I-NEXT: srli t1, a6, 8
-; RV32I-NEXT: srli t2, a1, 16
-; RV32I-NEXT: srli t3, a1, 24
+; RV32I-NEXT: srli a1, a3, 16
+; RV32I-NEXT: srli a5, a3, 24
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: srli a7, a0, 16
+; RV32I-NEXT: srli t0, a0, 24
+; RV32I-NEXT: srli t1, a0, 8
+; RV32I-NEXT: srli t2, a4, 16
+; RV32I-NEXT: srli t3, a4, 24
; RV32I-NEXT: sb a3, 8(a2)
-; RV32I-NEXT: sb a5, 9(a2)
-; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a0, a1, 8
-; RV32I-NEXT: sb a6, 12(a2)
+; RV32I-NEXT: sb a6, 9(a2)
+; RV32I-NEXT: sb a1, 10(a2)
+; RV32I-NEXT: sb a5, 11(a2)
+; RV32I-NEXT: srli a1, a4, 8
+; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: sb t1, 13(a2)
; RV32I-NEXT: sb a7, 14(a2)
; RV32I-NEXT: sb t0, 15(a2)
-; RV32I-NEXT: sb a1, 4(a2)
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a4, 4(a2)
+; RV32I-NEXT: sb a1, 5(a2)
; RV32I-NEXT: sb t2, 6(a2)
; RV32I-NEXT: sb t3, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
@@ -1168,73 +1186,82 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lbu t1, 6(a0)
; RV32I-NEXT: lbu t2, 7(a0)
; RV32I-NEXT: slli a4, a4, 8
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 8(a0)
+; RV32I-NEXT: lbu t3, 9(a0)
+; RV32I-NEXT: lbu t4, 10(a0)
+; RV32I-NEXT: lbu t5, 11(a0)
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a5, t0, a7
-; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 9(a0)
-; RV32I-NEXT: lbu t0, 10(a0)
-; RV32I-NEXT: lbu t3, 11(a0)
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: slli t0, t0, 16
-; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a7, t3, t0
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: or a6, t0, a7
+; RV32I-NEXT: or a7, t2, t1
; RV32I-NEXT: lbu t0, 12(a0)
-; RV32I-NEXT: lbu t2, 13(a0)
-; RV32I-NEXT: lbu t3, 14(a0)
-; RV32I-NEXT: lbu t4, 15(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or a1, t2, t0
-; RV32I-NEXT: mv t0, sp
-; RV32I-NEXT: slli t3, t3, 16
-; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: srli a4, a0, 3
-; RV32I-NEXT: or a5, t1, a5
-; RV32I-NEXT: andi t1, a0, 31
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: srai t3, t4, 31
-; RV32I-NEXT: andi a4, a4, 12
-; RV32I-NEXT: xori t1, t1, 31
+; RV32I-NEXT: lbu t1, 13(a0)
+; RV32I-NEXT: lbu t2, 14(a0)
+; RV32I-NEXT: lbu a0, 15(a0)
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t5, t5, 24
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or a4, t3, a4
+; RV32I-NEXT: or t3, t5, t4
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: lbu t1, 1(a1)
+; RV32I-NEXT: lbu t4, 0(a1)
+; RV32I-NEXT: lbu t5, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t1, t1, t4
+; RV32I-NEXT: slli t5, t5, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or a1, a1, t5
+; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: mv a5, sp
+; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or t2, a0, t2
+; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a1, t2, a1
-; RV32I-NEXT: sw t3, 16(sp)
-; RV32I-NEXT: sw t3, 20(sp)
-; RV32I-NEXT: sw t3, 24(sp)
-; RV32I-NEXT: sw t3, 28(sp)
-; RV32I-NEXT: add a4, t0, a4
+; RV32I-NEXT: or a4, t3, a4
+; RV32I-NEXT: or a7, t2, t0
+; RV32I-NEXT: or a1, a1, t1
+; RV32I-NEXT: sw a0, 16(sp)
+; RV32I-NEXT: sw a0, 20(sp)
+; RV32I-NEXT: sw a0, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: sw a5, 4(sp)
-; RV32I-NEXT: sw a6, 8(sp)
-; RV32I-NEXT: sw a1, 12(sp)
-; RV32I-NEXT: lw a1, 4(a4)
-; RV32I-NEXT: lw a3, 8(a4)
-; RV32I-NEXT: lw a5, 0(a4)
-; RV32I-NEXT: lw a4, 12(a4)
-; RV32I-NEXT: srl a6, a1, a0
-; RV32I-NEXT: slli a7, a3, 1
-; RV32I-NEXT: srl a5, a5, a0
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: srl a3, a3, a0
-; RV32I-NEXT: slli t0, a4, 1
-; RV32I-NEXT: sra a0, a4, a0
-; RV32I-NEXT: sll a4, a7, t1
-; RV32I-NEXT: sll a1, a1, t1
-; RV32I-NEXT: sll a7, t0, t1
+; RV32I-NEXT: sw a6, 4(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a7, 12(sp)
+; RV32I-NEXT: srli a0, a1, 3
+; RV32I-NEXT: andi a3, a1, 31
+; RV32I-NEXT: andi a0, a0, 12
+; RV32I-NEXT: xori a3, a3, 31
+; RV32I-NEXT: add a0, a5, a0
+; RV32I-NEXT: lw a4, 4(a0)
+; RV32I-NEXT: lw a5, 8(a0)
+; RV32I-NEXT: lw a6, 0(a0)
+; RV32I-NEXT: lw a0, 12(a0)
+; RV32I-NEXT: srl a7, a4, a1
+; RV32I-NEXT: slli t0, a5, 1
+; RV32I-NEXT: srl a6, a6, a1
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: srl a5, a5, a1
+; RV32I-NEXT: slli t1, a0, 1
+; RV32I-NEXT: sra a0, a0, a1
+; RV32I-NEXT: sll a1, t0, a3
+; RV32I-NEXT: sll a4, a4, a3
+; RV32I-NEXT: sll a3, t1, a3
; RV32I-NEXT: srli t0, a0, 16
; RV32I-NEXT: srli t1, a0, 24
; RV32I-NEXT: srli t2, a0, 8
+; RV32I-NEXT: or a1, a7, a1
; RV32I-NEXT: or a4, a6, a4
-; RV32I-NEXT: or a1, a5, a1
-; RV32I-NEXT: or a3, a3, a7
+; RV32I-NEXT: or a3, a5, a3
; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb t0, 14(a2)
@@ -1242,21 +1269,21 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: srli a0, a3, 16
; RV32I-NEXT: srli a5, a3, 24
; RV32I-NEXT: srli a6, a3, 8
-; RV32I-NEXT: srli a7, a1, 16
-; RV32I-NEXT: srli t0, a1, 24
-; RV32I-NEXT: srli t1, a1, 8
-; RV32I-NEXT: srli t2, a4, 16
-; RV32I-NEXT: srli t3, a4, 24
+; RV32I-NEXT: srli a7, a4, 16
+; RV32I-NEXT: srli t0, a4, 24
+; RV32I-NEXT: srli t1, a4, 8
+; RV32I-NEXT: srli t2, a1, 16
+; RV32I-NEXT: srli t3, a1, 24
; RV32I-NEXT: sb a3, 8(a2)
; RV32I-NEXT: sb a6, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
; RV32I-NEXT: sb a5, 11(a2)
-; RV32I-NEXT: srli a0, a4, 8
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: srli a0, a1, 8
+; RV32I-NEXT: sb a4, 0(a2)
; RV32I-NEXT: sb t1, 1(a2)
; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: sb t0, 3(a2)
-; RV32I-NEXT: sb a4, 4(a2)
+; RV32I-NEXT: sb a1, 4(a2)
; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: sb t2, 6(a2)
; RV32I-NEXT: sb t3, 7(a2)
@@ -1272,17 +1299,19 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: lshr_32bytes:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -144
-; RV64I-NEXT: sd s0, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s9, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -160
+; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
@@ -1299,122 +1328,143 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: slli a5, a5, 16
-; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
+; RV64I-NEXT: slli a4, a4, 8
+; RV64I-NEXT: slli s8, a5, 16
+; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
+; RV64I-NEXT: or a5, a4, a3
+; RV64I-NEXT: or a6, a6, s8
+; RV64I-NEXT: or a3, t0, a7
+; RV64I-NEXT: or a4, t2, t1
+; RV64I-NEXT: lbu s8, 20(a0)
+; RV64I-NEXT: lbu s9, 21(a0)
+; RV64I-NEXT: lbu s10, 22(a0)
+; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a6, t2, t1
-; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: lbu t5, 20(a0)
-; RV64I-NEXT: lbu t6, 21(a0)
-; RV64I-NEXT: lbu s8, 22(a0)
-; RV64I-NEXT: lbu s9, 23(a0)
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
+; RV64I-NEXT: or a7, t4, t3
+; RV64I-NEXT: or t0, t6, t5
+; RV64I-NEXT: or t1, s1, s0
+; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: lbu t6, 24(a0)
+; RV64I-NEXT: lbu s0, 25(a0)
+; RV64I-NEXT: lbu s1, 26(a0)
+; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
-; RV64I-NEXT: or t1, s1, s0
-; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
-; RV64I-NEXT: lbu s0, 24(a0)
-; RV64I-NEXT: lbu s1, 25(a0)
-; RV64I-NEXT: lbu s2, 26(a0)
-; RV64I-NEXT: lbu s3, 27(a0)
-; RV64I-NEXT: slli t6, t6, 8
-; RV64I-NEXT: slli s8, s8, 16
-; RV64I-NEXT: slli s9, s9, 24
-; RV64I-NEXT: slli s1, s1, 8
-; RV64I-NEXT: or t5, t6, t5
-; RV64I-NEXT: or t6, s9, s8
-; RV64I-NEXT: or s0, s1, s0
-; RV64I-NEXT: lbu s1, 28(a0)
+; RV64I-NEXT: or t5, s9, s8
+; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
-; RV64I-NEXT: lbu a0, 0(a1)
+; RV64I-NEXT: slli s10, s10, 16
+; RV64I-NEXT: slli s11, s11, 24
+; RV64I-NEXT: slli s0, s0, 8
+; RV64I-NEXT: slli s1, s1, 16
+; RV64I-NEXT: slli s2, s2, 24
+; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or a0, s11, s10
+; RV64I-NEXT: or t6, s0, t6
+; RV64I-NEXT: or s0, s2, s1
+; RV64I-NEXT: or s1, s4, s3
+; RV64I-NEXT: lbu s2, 0(a1)
+; RV64I-NEXT: lbu s3, 1(a1)
+; RV64I-NEXT: lbu s4, 2(a1)
+; RV64I-NEXT: lbu s7, 3(a1)
+; RV64I-NEXT: slli s5, s5, 16
+; RV64I-NEXT: slli s6, s6, 24
+; RV64I-NEXT: slli s3, s3, 8
+; RV64I-NEXT: slli s4, s4, 16
+; RV64I-NEXT: slli s7, s7, 24
+; RV64I-NEXT: or s5, s6, s5
+; RV64I-NEXT: or s2, s3, s2
+; RV64I-NEXT: or s3, s7, s4
+; RV64I-NEXT: lbu s4, 5(a1)
+; RV64I-NEXT: lbu s6, 4(a1)
+; RV64I-NEXT: lbu s7, 6(a1)
+; RV64I-NEXT: lbu a1, 7(a1)
+; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or s4, s4, s6
+; RV64I-NEXT: slli s7, s7, 16
+; RV64I-NEXT: slli a1, a1, 24
+; RV64I-NEXT: or a1, a1, s7
; RV64I-NEXT: sd zero, 32(sp)
; RV64I-NEXT: sd zero, 40(sp)
; RV64I-NEXT: sd zero, 48(sp)
; RV64I-NEXT: sd zero, 56(sp)
-; RV64I-NEXT: slli s2, s2, 16
-; RV64I-NEXT: slli s3, s3, 24
-; RV64I-NEXT: or a1, s3, s2
-; RV64I-NEXT: mv s2, sp
-; RV64I-NEXT: slli s4, s4, 8
-; RV64I-NEXT: slli s5, s5, 16
-; RV64I-NEXT: slli s6, s6, 24
-; RV64I-NEXT: or s1, s4, s1
-; RV64I-NEXT: srli s3, a0, 3
-; RV64I-NEXT: or s4, s6, s5
-; RV64I-NEXT: andi s5, a0, 63
-; RV64I-NEXT: andi s3, s3, 24
-; RV64I-NEXT: xori s5, s5, 63
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a6, t2, t1
-; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: or a1, a1, s0
-; RV64I-NEXT: or t1, s4, s1
-; RV64I-NEXT: add s2, s2, s3
-; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: slli t0, t0, 32
-; RV64I-NEXT: slli t1, t1, 32
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: mv a6, sp
; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a1, t1, a1
+; RV64I-NEXT: or a4, t0, a7
+; RV64I-NEXT: or a7, t2, t1
+; RV64I-NEXT: or t0, t4, t3
+; RV64I-NEXT: or a0, a0, t5
+; RV64I-NEXT: or t1, s0, t6
+; RV64I-NEXT: or t2, s5, s1
+; RV64I-NEXT: or t3, s3, s2
+; RV64I-NEXT: or a1, a1, s4
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: slli a7, a7, 32
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: slli t2, t2, 32
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: or a3, a3, a5
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a0, a0, t0
+; RV64I-NEXT: or a5, t2, t1
+; RV64I-NEXT: or a1, a1, t3
; RV64I-NEXT: sd a3, 0(sp)
; RV64I-NEXT: sd a4, 8(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a1, 24(sp)
-; RV64I-NEXT: ld a1, 8(s2)
-; RV64I-NEXT: ld a3, 16(s2)
-; RV64I-NEXT: ld a4, 0(s2)
-; RV64I-NEXT: ld a5, 24(s2)
-; RV64I-NEXT: srl a6, a1, a0
-; RV64I-NEXT: slli a7, a3, 1
-; RV64I-NEXT: srl a4, a4, a0
-; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: srl a3, a3, a0
+; RV64I-NEXT: sd a0, 16(sp)
+; RV64I-NEXT: sd a5, 24(sp)
+; RV64I-NEXT: srli a0, a1, 3
+; RV64I-NEXT: andi a3, a1, 63
+; RV64I-NEXT: andi a0, a0, 24
+; RV64I-NEXT: xori a3, a3, 63
+; RV64I-NEXT: add a0, a6, a0
+; RV64I-NEXT: ld a4, 8(a0)
+; RV64I-NEXT: ld a5, 16(a0)
+; RV64I-NEXT: ld a6, 0(a0)
+; RV64I-NEXT: ld a0, 24(a0)
+; RV64I-NEXT: srl a7, a4, a1
; RV64I-NEXT: slli t0, a5, 1
-; RV64I-NEXT: srl a5, a5, a0
-; RV64I-NEXT: sll a0, a7, s5
-; RV64I-NEXT: sll a1, a1, s5
-; RV64I-NEXT: sll a7, t0, s5
-; RV64I-NEXT: srli t0, a5, 56
-; RV64I-NEXT: srli t1, a5, 48
-; RV64I-NEXT: srli t2, a5, 40
-; RV64I-NEXT: srli t3, a5, 32
-; RV64I-NEXT: srli t4, a5, 24
-; RV64I-NEXT: srli t5, a5, 16
-; RV64I-NEXT: srli t6, a5, 8
-; RV64I-NEXT: or a0, a6, a0
-; RV64I-NEXT: or a1, a4, a1
-; RV64I-NEXT: or a3, a3, a7
+; RV64I-NEXT: srl a6, a6, a1
+; RV64I-NEXT: slli a4, a4, 1
+; RV64I-NEXT: srl a5, a5, a1
+; RV64I-NEXT: slli t1, a0, 1
+; RV64I-NEXT: srl t2, a0, a1
+; RV64I-NEXT: sll a0, t0, a3
+; RV64I-NEXT: sll a1, a4, a3
+; RV64I-NEXT: sll a3, t1, a3
+; RV64I-NEXT: srli a4, t2, 56
+; RV64I-NEXT: srli t0, t2, 48
+; RV64I-NEXT: srli t1, t2, 40
+; RV64I-NEXT: srli t3, t2, 32
+; RV64I-NEXT: srli t4, t2, 24
+; RV64I-NEXT: srli t5, t2, 16
+; RV64I-NEXT: srli t6, t2, 8
+; RV64I-NEXT: or a0, a7, a0
+; RV64I-NEXT: or a1, a6, a1
+; RV64I-NEXT: or a3, a5, a3
; RV64I-NEXT: sb t3, 28(a2)
-; RV64I-NEXT: sb t2, 29(a2)
-; RV64I-NEXT: sb t1, 30(a2)
-; RV64I-NEXT: sb t0, 31(a2)
-; RV64I-NEXT: sb a5, 24(a2)
+; RV64I-NEXT: sb t1, 29(a2)
+; RV64I-NEXT: sb t0, 30(a2)
+; RV64I-NEXT: sb a4, 31(a2)
+; RV64I-NEXT: sb t2, 24(a2)
; RV64I-NEXT: sb t6, 25(a2)
; RV64I-NEXT: sb t5, 26(a2)
; RV64I-NEXT: sb t4, 27(a2)
@@ -1463,17 +1513,19 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sb a1, 9(a2)
; RV64I-NEXT: sb a5, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: ld s0, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s9, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 144
+; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_32bytes:
@@ -1498,55 +1550,67 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lbu a7, 3(a0)
; RV32I-NEXT: lbu a5, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
-; RV32I-NEXT: lbu t3, 6(a0)
-; RV32I-NEXT: lbu t6, 7(a0)
-; RV32I-NEXT: lbu s2, 8(a0)
-; RV32I-NEXT: lbu s3, 9(a0)
-; RV32I-NEXT: lbu s4, 10(a0)
-; RV32I-NEXT: lbu s5, 11(a0)
-; RV32I-NEXT: lbu s7, 12(a0)
-; RV32I-NEXT: lbu s8, 13(a0)
-; RV32I-NEXT: lbu s9, 14(a0)
-; RV32I-NEXT: lbu s10, 15(a0)
-; RV32I-NEXT: lbu s11, 16(a0)
-; RV32I-NEXT: lbu ra, 17(a0)
-; RV32I-NEXT: lbu t4, 18(a0)
-; RV32I-NEXT: lbu s0, 19(a0)
+; RV32I-NEXT: lbu t1, 6(a0)
+; RV32I-NEXT: lbu t2, 7(a0)
+; RV32I-NEXT: lbu t3, 8(a0)
+; RV32I-NEXT: lbu t4, 9(a0)
+; RV32I-NEXT: lbu t5, 10(a0)
+; RV32I-NEXT: lbu t6, 11(a0)
+; RV32I-NEXT: lbu s0, 12(a0)
+; RV32I-NEXT: lbu s2, 13(a0)
+; RV32I-NEXT: lbu s4, 14(a0)
+; RV32I-NEXT: lbu s5, 15(a0)
+; RV32I-NEXT: lbu s6, 16(a0)
+; RV32I-NEXT: lbu s7, 17(a0)
+; RV32I-NEXT: lbu s8, 18(a0)
+; RV32I-NEXT: lbu s9, 19(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a7, a7, 24
; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a4, a7, a6
-; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 21(a0)
-; RV32I-NEXT: lbu t5, 22(a0)
-; RV32I-NEXT: lbu s1, 23(a0)
+; RV32I-NEXT: lbu s10, 20(a0)
+; RV32I-NEXT: lbu s11, 21(a0)
+; RV32I-NEXT: lbu ra, 22(a0)
+; RV32I-NEXT: lbu a3, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t2, t2, 24
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: slli t5, t5, 16
; RV32I-NEXT: slli t6, t6, 24
-; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or a5, t0, a5
+; RV32I-NEXT: or a6, t2, t1
+; RV32I-NEXT: or a7, t4, t3
+; RV32I-NEXT: or t0, t6, t5
+; RV32I-NEXT: lbu s1, 24(a0)
+; RV32I-NEXT: lbu s3, 25(a0)
+; RV32I-NEXT: lbu t4, 26(a0)
+; RV32I-NEXT: lbu t5, 27(a0)
+; RV32I-NEXT: slli s2, s2, 8
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s5, s5, 24
-; RV32I-NEXT: or a5, t0, a5
-; RV32I-NEXT: or a6, t6, t3
-; RV32I-NEXT: or a7, s3, s2
-; RV32I-NEXT: or t0, s5, s4
-; RV32I-NEXT: lbu t3, 24(a0)
-; RV32I-NEXT: lbu s5, 25(a0)
-; RV32I-NEXT: lbu s6, 26(a0)
-; RV32I-NEXT: lbu t6, 27(a0)
-; RV32I-NEXT: slli s8, s8, 8
-; RV32I-NEXT: slli s9, s9, 16
-; RV32I-NEXT: slli s10, s10, 24
-; RV32I-NEXT: slli ra, ra, 8
-; RV32I-NEXT: or s7, s8, s7
-; RV32I-NEXT: or s2, s10, s9
-; RV32I-NEXT: or s3, ra, s11
-; RV32I-NEXT: lbu s4, 28(a0)
-; RV32I-NEXT: lbu s8, 29(a0)
-; RV32I-NEXT: lbu s9, 30(a0)
-; RV32I-NEXT: lbu s10, 31(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
+; RV32I-NEXT: slli s7, s7, 8
+; RV32I-NEXT: or t1, s2, s0
+; RV32I-NEXT: or t2, s5, s4
+; RV32I-NEXT: or t3, s7, s6
+; RV32I-NEXT: lbu t6, 28(a0)
+; RV32I-NEXT: lbu s4, 29(a0)
+; RV32I-NEXT: lbu s5, 30(a0)
+; RV32I-NEXT: lbu s6, 31(a0)
+; RV32I-NEXT: slli s8, s8, 16
+; RV32I-NEXT: slli s9, s9, 24
+; RV32I-NEXT: slli s11, s11, 8
+; RV32I-NEXT: slli ra, ra, 16
+; RV32I-NEXT: slli a3, a3, 24
+; RV32I-NEXT: or a0, s9, s8
+; RV32I-NEXT: or s0, s11, s10
+; RV32I-NEXT: or s2, a3, ra
+; RV32I-NEXT: lbu a3, 0(a1)
+; RV32I-NEXT: lbu s7, 1(a1)
+; RV32I-NEXT: lbu s8, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: sw zero, 56(sp)
; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 64(sp)
@@ -1555,90 +1619,89 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw zero, 44(sp)
; RV32I-NEXT: sw zero, 48(sp)
; RV32I-NEXT: sw zero, 52(sp)
+; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or s1, s3, s1
+; RV32I-NEXT: addi s3, sp, 8
; RV32I-NEXT: slli t4, t4, 16
-; RV32I-NEXT: slli s0, s0, 24
-; RV32I-NEXT: or t4, s0, t4
-; RV32I-NEXT: addi s0, sp, 8
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: slli t5, t5, 16
-; RV32I-NEXT: slli s1, s1, 24
-; RV32I-NEXT: slli s5, s5, 8
-; RV32I-NEXT: slli s6, s6, 16
-; RV32I-NEXT: slli t6, t6, 24
-; RV32I-NEXT: slli s8, s8, 8
-; RV32I-NEXT: slli s9, s9, 16
-; RV32I-NEXT: slli s10, s10, 24
-; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t5, t5, 24
+; RV32I-NEXT: slli s4, s4, 8
+; RV32I-NEXT: slli s5, s5, 16
+; RV32I-NEXT: slli s6, s6, 24
+; RV32I-NEXT: slli s7, s7, 8
+; RV32I-NEXT: slli s8, s8, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or t4, t5, t4
+; RV32I-NEXT: or t5, s4, t6
+; RV32I-NEXT: or t6, s6, s5
+; RV32I-NEXT: or a3, s7, a3
+; RV32I-NEXT: or a1, a1, s8
+; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: or a4, a4, s4
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: or a6, t0, a7
+; RV32I-NEXT: or a7, t2, t1
+; RV32I-NEXT: or t0, a0, t3
+; RV32I-NEXT: or t1, s2, s0
+; RV32I-NEXT: or t2, t4, s1
+; RV32I-NEXT: or t3, t6, t5
+; RV32I-NEXT: or a0, a1, a3
+; RV32I-NEXT: sw t0, 24(sp)
+; RV32I-NEXT: sw t1, 28(sp)
+; RV32I-NEXT: sw t2, 32(sp)
+; RV32I-NEXT: sw t3, 36(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a6, 16(sp)
+; RV32I-NEXT: sw a7, 20(sp)
; RV32I-NEXT: srli a1, a0, 3
-; RV32I-NEXT: or t2, s1, t5
-; RV32I-NEXT: andi t5, a0, 31
-; RV32I-NEXT: or t3, s5, t3
-; RV32I-NEXT: or t6, t6, s6
-; RV32I-NEXT: or s1, s8, s4
-; RV32I-NEXT: or s4, s10, s9
-; RV32I-NEXT: andi s5, a1, 28
-; RV32I-NEXT: xori a1, t5, 31
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a5, t0, a7
-; RV32I-NEXT: or a6, s2, s7
-; RV32I-NEXT: or a7, t4, s3
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or t1, t6, t3
-; RV32I-NEXT: or t2, s4, s1
-; RV32I-NEXT: add s0, s0, s5
-; RV32I-NEXT: sw a7, 24(sp)
-; RV32I-NEXT: sw t0, 28(sp)
-; RV32I-NEXT: sw t1, 32(sp)
-; RV32I-NEXT: sw t2, 36(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a5, 16(sp)
-; RV32I-NEXT: sw a6, 20(sp)
-; RV32I-NEXT: lw a3, 0(s0)
-; RV32I-NEXT: lw a4, 4(s0)
-; RV32I-NEXT: lw a5, 8(s0)
-; RV32I-NEXT: lw a6, 12(s0)
-; RV32I-NEXT: lw a7, 16(s0)
-; RV32I-NEXT: lw t0, 20(s0)
-; RV32I-NEXT: lw t1, 24(s0)
-; RV32I-NEXT: lw t2, 28(s0)
-; RV32I-NEXT: srl t3, a4, a0
-; RV32I-NEXT: slli t4, a5, 1
+; RV32I-NEXT: andi a3, a0, 31
+; RV32I-NEXT: andi a4, a1, 28
+; RV32I-NEXT: xori a1, a3, 31
+; RV32I-NEXT: add a4, s3, a4
+; RV32I-NEXT: lw a3, 0(a4)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a6, 8(a4)
+; RV32I-NEXT: lw a7, 12(a4)
+; RV32I-NEXT: lw t0, 16(a4)
+; RV32I-NEXT: lw t1, 20(a4)
+; RV32I-NEXT: lw t2, 24(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: srl t3, a5, a0
+; RV32I-NEXT: slli t4, a6, 1
; RV32I-NEXT: srl a3, a3, a0
-; RV32I-NEXT: slli a4, a4, 1
-; RV32I-NEXT: srl t5, a6, a0
-; RV32I-NEXT: slli t6, a7, 1
-; RV32I-NEXT: srl a5, a5, a0
-; RV32I-NEXT: slli a6, a6, 1
-; RV32I-NEXT: srl s0, t0, a0
-; RV32I-NEXT: slli s1, t1, 1
-; RV32I-NEXT: srl a7, a7, a0
-; RV32I-NEXT: slli t0, t0, 1
-; RV32I-NEXT: srl t1, t1, a0
-; RV32I-NEXT: slli s2, t2, 1
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: srl t5, a7, a0
+; RV32I-NEXT: slli t6, t0, 1
+; RV32I-NEXT: srl a6, a6, a0
+; RV32I-NEXT: slli a7, a7, 1
+; RV32I-NEXT: srl s0, t1, a0
+; RV32I-NEXT: slli s1, t2, 1
+; RV32I-NEXT: srl t0, t0, a0
+; RV32I-NEXT: slli t1, t1, 1
; RV32I-NEXT: srl t2, t2, a0
+; RV32I-NEXT: slli s2, a4, 1
+; RV32I-NEXT: srl s3, a4, a0
; RV32I-NEXT: sll a0, t4, a1
-; RV32I-NEXT: sll a4, a4, a1
-; RV32I-NEXT: sll t4, t6, a1
-; RV32I-NEXT: sll a6, a6, a1
-; RV32I-NEXT: sll t6, s1, a1
-; RV32I-NEXT: sll t0, t0, a1
-; RV32I-NEXT: sll s1, s2, a1
-; RV32I-NEXT: srli s2, t2, 24
-; RV32I-NEXT: srli s3, t2, 16
-; RV32I-NEXT: srli s4, t2, 8
+; RV32I-NEXT: sll a4, a5, a1
+; RV32I-NEXT: sll a5, t6, a1
+; RV32I-NEXT: sll a7, a7, a1
+; RV32I-NEXT: sll t4, s1, a1
+; RV32I-NEXT: sll t1, t1, a1
+; RV32I-NEXT: sll t6, s2, a1
+; RV32I-NEXT: srli s1, s3, 24
+; RV32I-NEXT: srli s2, s3, 16
+; RV32I-NEXT: srli s4, s3, 8
; RV32I-NEXT: or a0, t3, a0
; RV32I-NEXT: or a1, a3, a4
-; RV32I-NEXT: or a3, t5, t4
-; RV32I-NEXT: or a4, a5, a6
-; RV32I-NEXT: or a5, s0, t6
-; RV32I-NEXT: or a6, a7, t0
-; RV32I-NEXT: or a7, t1, s1
-; RV32I-NEXT: sb t2, 28(a2)
+; RV32I-NEXT: or a3, t5, a5
+; RV32I-NEXT: or a4, a6, a7
+; RV32I-NEXT: or a5, s0, t4
+; RV32I-NEXT: or a6, t0, t1
+; RV32I-NEXT: or a7, t2, t6
+; RV32I-NEXT: sb s3, 28(a2)
; RV32I-NEXT: sb s4, 29(a2)
-; RV32I-NEXT: sb s3, 30(a2)
-; RV32I-NEXT: sb s2, 31(a2)
+; RV32I-NEXT: sb s2, 30(a2)
+; RV32I-NEXT: sb s1, 31(a2)
; RV32I-NEXT: srli t0, a7, 24
; RV32I-NEXT: srli t1, a7, 16
; RV32I-NEXT: srli t2, a7, 8
@@ -1712,17 +1775,19 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: shl_32bytes:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -144
-; RV64I-NEXT: sd s0, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s9, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -160
+; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
@@ -1739,125 +1804,146 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: slli a5, a5, 16
-; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
+; RV64I-NEXT: slli a4, a4, 8
+; RV64I-NEXT: slli s8, a5, 16
+; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
+; RV64I-NEXT: or a5, a4, a3
+; RV64I-NEXT: or a6, a6, s8
+; RV64I-NEXT: or a3, t0, a7
+; RV64I-NEXT: or a4, t2, t1
+; RV64I-NEXT: lbu s8, 20(a0)
+; RV64I-NEXT: lbu s9, 21(a0)
+; RV64I-NEXT: lbu s10, 22(a0)
+; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a6, t2, t1
-; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: lbu t5, 20(a0)
-; RV64I-NEXT: lbu t6, 21(a0)
-; RV64I-NEXT: lbu s8, 22(a0)
-; RV64I-NEXT: lbu s9, 23(a0)
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
+; RV64I-NEXT: or a7, t4, t3
+; RV64I-NEXT: or t0, t6, t5
+; RV64I-NEXT: or t1, s1, s0
+; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: lbu t6, 24(a0)
+; RV64I-NEXT: lbu s0, 25(a0)
+; RV64I-NEXT: lbu s1, 26(a0)
+; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
-; RV64I-NEXT: or t1, s1, s0
-; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
-; RV64I-NEXT: lbu s0, 24(a0)
-; RV64I-NEXT: lbu s1, 25(a0)
-; RV64I-NEXT: lbu s2, 26(a0)
-; RV64I-NEXT: lbu s3, 27(a0)
-; RV64I-NEXT: slli t6, t6, 8
-; RV64I-NEXT: slli s8, s8, 16
-; RV64I-NEXT: slli s9, s9, 24
-; RV64I-NEXT: slli s1, s1, 8
-; RV64I-NEXT: or t5, t6, t5
-; RV64I-NEXT: or t6, s9, s8
-; RV64I-NEXT: or s0, s1, s0
-; RV64I-NEXT: lbu s1, 28(a0)
+; RV64I-NEXT: or t5, s9, s8
+; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
-; RV64I-NEXT: lbu a0, 0(a1)
+; RV64I-NEXT: slli s10, s10, 16
+; RV64I-NEXT: slli s11, s11, 24
+; RV64I-NEXT: slli s0, s0, 8
+; RV64I-NEXT: slli s1, s1, 16
+; RV64I-NEXT: slli s2, s2, 24
+; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or a0, s11, s10
+; RV64I-NEXT: or t6, s0, t6
+; RV64I-NEXT: or s0, s2, s1
+; RV64I-NEXT: or s1, s4, s3
+; RV64I-NEXT: lbu s2, 0(a1)
+; RV64I-NEXT: lbu s3, 1(a1)
+; RV64I-NEXT: lbu s4, 2(a1)
+; RV64I-NEXT: lbu s7, 3(a1)
+; RV64I-NEXT: slli s5, s5, 16
+; RV64I-NEXT: slli s6, s6, 24
+; RV64I-NEXT: slli s3, s3, 8
+; RV64I-NEXT: slli s4, s4, 16
+; RV64I-NEXT: slli s7, s7, 24
+; RV64I-NEXT: or s5, s6, s5
+; RV64I-NEXT: or s2, s3, s2
+; RV64I-NEXT: or s3, s7, s4
+; RV64I-NEXT: lbu s4, 5(a1)
+; RV64I-NEXT: lbu s6, 4(a1)
+; RV64I-NEXT: lbu s7, 6(a1)
+; RV64I-NEXT: lbu a1, 7(a1)
+; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or s4, s4, s6
+; RV64I-NEXT: slli s7, s7, 16
+; RV64I-NEXT: slli a1, a1, 24
+; RV64I-NEXT: or a1, a1, s7
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: sd zero, 24(sp)
-; RV64I-NEXT: slli s2, s2, 16
-; RV64I-NEXT: slli s3, s3, 24
-; RV64I-NEXT: or a1, s3, s2
-; RV64I-NEXT: addi s2, sp, 32
-; RV64I-NEXT: slli s4, s4, 8
-; RV64I-NEXT: slli s5, s5, 16
-; RV64I-NEXT: slli s6, s6, 24
-; RV64I-NEXT: or s1, s4, s1
-; RV64I-NEXT: srli s3, a0, 3
-; RV64I-NEXT: or s4, s6, s5
-; RV64I-NEXT: andi s5, a0, 63
-; RV64I-NEXT: andi s3, s3, 24
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a6, t2, t1
-; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: or a1, a1, s0
-; RV64I-NEXT: or t1, s4, s1
-; RV64I-NEXT: sub t2, s2, s3
-; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: slli t0, t0, 32
-; RV64I-NEXT: slli t1, t1, 32
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: addi a6, sp, 32
; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a1, t1, a1
+; RV64I-NEXT: or a4, t0, a7
+; RV64I-NEXT: or a7, t2, t1
+; RV64I-NEXT: or t0, t4, t3
+; RV64I-NEXT: or a0, a0, t5
+; RV64I-NEXT: or t1, s0, t6
+; RV64I-NEXT: or t2, s5, s1
+; RV64I-NEXT: or t3, s3, s2
+; RV64I-NEXT: or a1, a1, s4
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: slli a7, a7, 32
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: slli t2, t2, 32
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: or a3, a3, a5
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a0, a0, t0
+; RV64I-NEXT: or a5, t2, t1
+; RV64I-NEXT: or a1, a1, t3
; RV64I-NEXT: sd a3, 32(sp)
; RV64I-NEXT: sd a4, 40(sp)
-; RV64I-NEXT: sd a5, 48(sp)
-; RV64I-NEXT: sd a1, 56(sp)
-; RV64I-NEXT: ld a1, 0(t2)
-; RV64I-NEXT: ld a3, 8(t2)
-; RV64I-NEXT: ld a4, 16(t2)
-; RV64I-NEXT: ld a5, 24(t2)
-; RV64I-NEXT: xori a6, s5, 63
-; RV64I-NEXT: sll a7, a3, a0
-; RV64I-NEXT: srli t0, a1, 1
-; RV64I-NEXT: sll a5, a5, a0
-; RV64I-NEXT: srli t1, a4, 1
-; RV64I-NEXT: sll a4, a4, a0
-; RV64I-NEXT: srli a3, a3, 1
-; RV64I-NEXT: sll t2, a1, a0
-; RV64I-NEXT: srl a0, t0, a6
-; RV64I-NEXT: srl a1, t1, a6
-; RV64I-NEXT: srl a3, a3, a6
-; RV64I-NEXT: srli a6, t2, 56
-; RV64I-NEXT: srli t0, t2, 48
-; RV64I-NEXT: srli t1, t2, 40
-; RV64I-NEXT: srli t3, t2, 32
-; RV64I-NEXT: srli t4, t2, 24
-; RV64I-NEXT: srli t5, t2, 16
-; RV64I-NEXT: srli t6, t2, 8
-; RV64I-NEXT: or a0, a7, a0
-; RV64I-NEXT: or a1, a5, a1
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: sb t3, 4(a2)
-; RV64I-NEXT: sb t1, 5(a2)
-; RV64I-NEXT: sb t0, 6(a2)
-; RV64I-NEXT: sb a6, 7(a2)
-; RV64I-NEXT: sb t2, 0(a2)
-; RV64I-NEXT: sb t6, 1(a2)
-; RV64I-NEXT: sb t5, 2(a2)
-; RV64I-NEXT: sb t4, 3(a2)
+; RV64I-NEXT: sd a0, 48(sp)
+; RV64I-NEXT: sd a5, 56(sp)
+; RV64I-NEXT: srli a0, a1, 3
+; RV64I-NEXT: andi a3, a1, 63
+; RV64I-NEXT: andi a0, a0, 24
+; RV64I-NEXT: sub a0, a6, a0
+; RV64I-NEXT: ld a4, 0(a0)
+; RV64I-NEXT: ld a5, 8(a0)
+; RV64I-NEXT: ld a6, 16(a0)
+; RV64I-NEXT: ld a0, 24(a0)
+; RV64I-NEXT: xori a3, a3, 63
+; RV64I-NEXT: sll a7, a5, a1
+; RV64I-NEXT: srli t0, a4, 1
+; RV64I-NEXT: sll t1, a0, a1
+; RV64I-NEXT: srli a0, a6, 1
+; RV64I-NEXT: sll a6, a6, a1
+; RV64I-NEXT: srli a5, a5, 1
+; RV64I-NEXT: sll a4, a4, a1
+; RV64I-NEXT: srl a1, t0, a3
+; RV64I-NEXT: srl t0, a0, a3
+; RV64I-NEXT: srl a3, a5, a3
+; RV64I-NEXT: srli a5, a4, 56
+; RV64I-NEXT: srli t2, a4, 48
+; RV64I-NEXT: srli t3, a4, 40
+; RV64I-NEXT: srli t4, a4, 32
+; RV64I-NEXT: srli t5, a4, 24
+; RV64I-NEXT: srli t6, a4, 16
+; RV64I-NEXT: srli s0, a4, 8
+; RV64I-NEXT: or a0, a7, a1
+; RV64I-NEXT: or a1, t1, t0
+; RV64I-NEXT: or a3, a6, a3
+; RV64I-NEXT: sb t4, 4(a2)
+; RV64I-NEXT: sb t3, 5(a2)
+; RV64I-NEXT: sb t2, 6(a2)
+; RV64I-NEXT: sb a5, 7(a2)
+; RV64I-NEXT: sb a4, 0(a2)
+; RV64I-NEXT: sb s0, 1(a2)
+; RV64I-NEXT: sb t6, 2(a2)
+; RV64I-NEXT: sb t5, 3(a2)
; RV64I-NEXT: srli a4, a3, 56
; RV64I-NEXT: srli a5, a3, 48
; RV64I-NEXT: srli a6, a3, 40
@@ -1903,17 +1989,19 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sb a1, 9(a2)
; RV64I-NEXT: sb a5, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: ld s0, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s9, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 144
+; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_32bytes:
@@ -1938,55 +2026,67 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lbu a7, 3(a0)
; RV32I-NEXT: lbu a5, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
-; RV32I-NEXT: lbu t3, 6(a0)
-; RV32I-NEXT: lbu t6, 7(a0)
-; RV32I-NEXT: lbu s2, 8(a0)
-; RV32I-NEXT: lbu s3, 9(a0)
-; RV32I-NEXT: lbu s4, 10(a0)
-; RV32I-NEXT: lbu s5, 11(a0)
-; RV32I-NEXT: lbu s7, 12(a0)
-; RV32I-NEXT: lbu s8, 13(a0)
-; RV32I-NEXT: lbu s9, 14(a0)
-; RV32I-NEXT: lbu s10, 15(a0)
-; RV32I-NEXT: lbu s11, 16(a0)
-; RV32I-NEXT: lbu ra, 17(a0)
-; RV32I-NEXT: lbu t4, 18(a0)
-; RV32I-NEXT: lbu s0, 19(a0)
+; RV32I-NEXT: lbu t1, 6(a0)
+; RV32I-NEXT: lbu t2, 7(a0)
+; RV32I-NEXT: lbu t3, 8(a0)
+; RV32I-NEXT: lbu t4, 9(a0)
+; RV32I-NEXT: lbu t5, 10(a0)
+; RV32I-NEXT: lbu t6, 11(a0)
+; RV32I-NEXT: lbu s0, 12(a0)
+; RV32I-NEXT: lbu s2, 13(a0)
+; RV32I-NEXT: lbu s4, 14(a0)
+; RV32I-NEXT: lbu s5, 15(a0)
+; RV32I-NEXT: lbu s6, 16(a0)
+; RV32I-NEXT: lbu s7, 17(a0)
+; RV32I-NEXT: lbu s8, 18(a0)
+; RV32I-NEXT: lbu s9, 19(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a7, a7, 24
; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a4, a7, a6
-; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 21(a0)
-; RV32I-NEXT: lbu t5, 22(a0)
-; RV32I-NEXT: lbu s1, 23(a0)
+; RV32I-NEXT: lbu s10, 20(a0)
+; RV32I-NEXT: lbu s11, 21(a0)
+; RV32I-NEXT: lbu ra, 22(a0)
+; RV32I-NEXT: lbu a3, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t2, t2, 24
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: slli t5, t5, 16
; RV32I-NEXT: slli t6, t6, 24
-; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or a5, t0, a5
+; RV32I-NEXT: or a6, t2, t1
+; RV32I-NEXT: or a7, t4, t3
+; RV32I-NEXT: or t0, t6, t5
+; RV32I-NEXT: lbu s1, 24(a0)
+; RV32I-NEXT: lbu s3, 25(a0)
+; RV32I-NEXT: lbu t4, 26(a0)
+; RV32I-NEXT: lbu t5, 27(a0)
+; RV32I-NEXT: slli s2, s2, 8
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s5, s5, 24
-; RV32I-NEXT: or a5, t0, a5
-; RV32I-NEXT: or a6, t6, t3
-; RV32I-NEXT: or a7, s3, s2
-; RV32I-NEXT: or t0, s5, s4
-; RV32I-NEXT: lbu t3, 24(a0)
-; RV32I-NEXT: lbu s5, 25(a0)
-; RV32I-NEXT: lbu s6, 26(a0)
-; RV32I-NEXT: lbu t6, 27(a0)
-; RV32I-NEXT: slli s8, s8, 8
-; RV32I-NEXT: slli s9, s9, 16
-; RV32I-NEXT: slli s10, s10, 24
-; RV32I-NEXT: slli ra, ra, 8
-; RV32I-NEXT: or s7, s8, s7
-; RV32I-NEXT: or s2, s10, s9
-; RV32I-NEXT: or s3, ra, s11
-; RV32I-NEXT: lbu s4, 28(a0)
-; RV32I-NEXT: lbu s8, 29(a0)
-; RV32I-NEXT: lbu s9, 30(a0)
-; RV32I-NEXT: lbu s10, 31(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
+; RV32I-NEXT: slli s7, s7, 8
+; RV32I-NEXT: or t1, s2, s0
+; RV32I-NEXT: or t2, s5, s4
+; RV32I-NEXT: or t3, s7, s6
+; RV32I-NEXT: lbu t6, 28(a0)
+; RV32I-NEXT: lbu s4, 29(a0)
+; RV32I-NEXT: lbu s5, 30(a0)
+; RV32I-NEXT: lbu s6, 31(a0)
+; RV32I-NEXT: slli s8, s8, 16
+; RV32I-NEXT: slli s9, s9, 24
+; RV32I-NEXT: slli s11, s11, 8
+; RV32I-NEXT: slli ra, ra, 16
+; RV32I-NEXT: slli a3, a3, 24
+; RV32I-NEXT: or a0, s9, s8
+; RV32I-NEXT: or s0, s11, s10
+; RV32I-NEXT: or s2, a3, ra
+; RV32I-NEXT: lbu a3, 0(a1)
+; RV32I-NEXT: lbu s7, 1(a1)
+; RV32I-NEXT: lbu s8, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: sw zero, 24(sp)
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 32(sp)
@@ -1995,89 +2095,88 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw zero, 16(sp)
; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or s1, s3, s1
+; RV32I-NEXT: addi s3, sp, 40
; RV32I-NEXT: slli t4, t4, 16
-; RV32I-NEXT: slli s0, s0, 24
-; RV32I-NEXT: or t4, s0, t4
-; RV32I-NEXT: addi s0, sp, 40
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: slli t5, t5, 16
-; RV32I-NEXT: slli s1, s1, 24
-; RV32I-NEXT: slli s5, s5, 8
-; RV32I-NEXT: slli s6, s6, 16
-; RV32I-NEXT: slli t6, t6, 24
-; RV32I-NEXT: slli s8, s8, 8
-; RV32I-NEXT: slli s9, s9, 16
-; RV32I-NEXT: slli s10, s10, 24
-; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t5, t5, 24
+; RV32I-NEXT: slli s4, s4, 8
+; RV32I-NEXT: slli s5, s5, 16
+; RV32I-NEXT: slli s6, s6, 24
+; RV32I-NEXT: slli s7, s7, 8
+; RV32I-NEXT: slli s8, s8, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or t4, t5, t4
+; RV32I-NEXT: or t5, s4, t6
+; RV32I-NEXT: or t6, s6, s5
+; RV32I-NEXT: or a3, s7, a3
+; RV32I-NEXT: or a1, a1, s8
+; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: or a4, a4, s4
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: or a6, t0, a7
+; RV32I-NEXT: or a7, t2, t1
+; RV32I-NEXT: or t0, a0, t3
+; RV32I-NEXT: or t1, s2, s0
+; RV32I-NEXT: or t2, t4, s1
+; RV32I-NEXT: or t3, t6, t5
+; RV32I-NEXT: or a0, a1, a3
+; RV32I-NEXT: sw t0, 56(sp)
+; RV32I-NEXT: sw t1, 60(sp)
+; RV32I-NEXT: sw t2, 64(sp)
+; RV32I-NEXT: sw t3, 68(sp)
+; RV32I-NEXT: sw a4, 40(sp)
+; RV32I-NEXT: sw a5, 44(sp)
+; RV32I-NEXT: sw a6, 48(sp)
+; RV32I-NEXT: sw a7, 52(sp)
; RV32I-NEXT: srli a1, a0, 3
-; RV32I-NEXT: or t2, s1, t5
-; RV32I-NEXT: andi t5, a0, 31
-; RV32I-NEXT: or t3, s5, t3
-; RV32I-NEXT: or t6, t6, s6
-; RV32I-NEXT: or s1, s8, s4
-; RV32I-NEXT: or s4, s10, s9
-; RV32I-NEXT: andi s5, a1, 28
-; RV32I-NEXT: xori a1, t5, 31
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a5, t0, a7
-; RV32I-NEXT: or a6, s2, s7
-; RV32I-NEXT: or a7, t4, s3
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or t1, t6, t3
-; RV32I-NEXT: or t2, s4, s1
-; RV32I-NEXT: sub t3, s0, s5
-; RV32I-NEXT: sw a7, 56(sp)
-; RV32I-NEXT: sw t0, 60(sp)
-; RV32I-NEXT: sw t1, 64(sp)
-; RV32I-NEXT: sw t2, 68(sp)
-; RV32I-NEXT: sw a3, 40(sp)
-; RV32I-NEXT: sw a4, 44(sp)
-; RV32I-NEXT: sw a5, 48(sp)
-; RV32I-NEXT: sw a6, 52(sp)
-; RV32I-NEXT: lw a3, 0(t3)
-; RV32I-NEXT: lw a4, 4(t3)
-; RV32I-NEXT: lw a5, 8(t3)
-; RV32I-NEXT: lw a6, 12(t3)
-; RV32I-NEXT: lw a7, 16(t3)
-; RV32I-NEXT: lw t0, 20(t3)
-; RV32I-NEXT: lw t1, 24(t3)
-; RV32I-NEXT: lw t2, 28(t3)
-; RV32I-NEXT: sll t3, a4, a0
-; RV32I-NEXT: srli t4, a3, 1
-; RV32I-NEXT: sll t5, a6, a0
-; RV32I-NEXT: srli t6, a5, 1
-; RV32I-NEXT: sll a5, a5, a0
-; RV32I-NEXT: srli a4, a4, 1
-; RV32I-NEXT: sll s0, t0, a0
-; RV32I-NEXT: srli s1, a7, 1
-; RV32I-NEXT: sll a7, a7, a0
-; RV32I-NEXT: srli a6, a6, 1
+; RV32I-NEXT: andi a3, a0, 31
+; RV32I-NEXT: andi a4, a1, 28
+; RV32I-NEXT: xori a1, a3, 31
+; RV32I-NEXT: sub a3, s3, a4
+; RV32I-NEXT: lw a4, 0(a3)
+; RV32I-NEXT: lw a5, 4(a3)
+; RV32I-NEXT: lw a6, 8(a3)
+; RV32I-NEXT: lw a7, 12(a3)
+; RV32I-NEXT: lw t0, 16(a3)
+; RV32I-NEXT: lw t1, 20(a3)
+; RV32I-NEXT: lw t2, 24(a3)
+; RV32I-NEXT: lw a3, 28(a3)
+; RV32I-NEXT: sll t3, a5, a0
+; RV32I-NEXT: srli t4, a4, 1
+; RV32I-NEXT: sll t5, a7, a0
+; RV32I-NEXT: srli t6, a6, 1
+; RV32I-NEXT: sll a6, a6, a0
+; RV32I-NEXT: srli a5, a5, 1
+; RV32I-NEXT: sll s0, t1, a0
+; RV32I-NEXT: srli s1, t0, 1
+; RV32I-NEXT: sll t0, t0, a0
+; RV32I-NEXT: srli a7, a7, 1
+; RV32I-NEXT: sll s2, a3, a0
+; RV32I-NEXT: srli a3, t2, 1
; RV32I-NEXT: sll t2, t2, a0
-; RV32I-NEXT: srli s2, t1, 1
-; RV32I-NEXT: sll t1, t1, a0
-; RV32I-NEXT: srli t0, t0, 1
-; RV32I-NEXT: sll s3, a3, a0
+; RV32I-NEXT: srli t1, t1, 1
+; RV32I-NEXT: sll s3, a4, a0
; RV32I-NEXT: srl a0, t4, a1
-; RV32I-NEXT: srl a3, t6, a1
-; RV32I-NEXT: srl a4, a4, a1
+; RV32I-NEXT: srl a4, t6, a1
+; RV32I-NEXT: srl a5, a5, a1
; RV32I-NEXT: srl t4, s1, a1
-; RV32I-NEXT: srl a6, a6, a1
-; RV32I-NEXT: srl t6, s2, a1
-; RV32I-NEXT: srl t0, t0, a1
+; RV32I-NEXT: srl a7, a7, a1
+; RV32I-NEXT: srl t6, a3, a1
+; RV32I-NEXT: srl t1, t1, a1
; RV32I-NEXT: srli s1, s3, 24
-; RV32I-NEXT: srli s2, s3, 16
-; RV32I-NEXT: srli s4, s3, 8
+; RV32I-NEXT: srli s4, s3, 16
+; RV32I-NEXT: srli s5, s3, 8
; RV32I-NEXT: or a0, t3, a0
-; RV32I-NEXT: or a1, t5, a3
-; RV32I-NEXT: or a3, a5, a4
+; RV32I-NEXT: or a1, t5, a4
+; RV32I-NEXT: or a3, a6, a5
; RV32I-NEXT: or a4, s0, t4
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a6, t2, t6
-; RV32I-NEXT: or a7, t1, t0
+; RV32I-NEXT: or a5, t0, a7
+; RV32I-NEXT: or a6, s2, t6
+; RV32I-NEXT: or a7, t2, t1
; RV32I-NEXT: sb s3, 0(a2)
-; RV32I-NEXT: sb s4, 1(a2)
-; RV32I-NEXT: sb s2, 2(a2)
+; RV32I-NEXT: sb s5, 1(a2)
+; RV32I-NEXT: sb s4, 2(a2)
; RV32I-NEXT: sb s1, 3(a2)
; RV32I-NEXT: srli t0, a7, 24
; RV32I-NEXT: srli t1, a7, 16
@@ -2152,17 +2251,19 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: ashr_32bytes:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -144
-; RV64I-NEXT: sd s0, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s9, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -160
+; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
@@ -2179,123 +2280,144 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: slli a5, a5, 16
-; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
+; RV64I-NEXT: slli a4, a4, 8
+; RV64I-NEXT: slli a5, a5, 16
+; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: or a4, a6, a5
+; RV64I-NEXT: or a5, t0, a7
+; RV64I-NEXT: or a6, t2, t1
+; RV64I-NEXT: lbu s8, 20(a0)
+; RV64I-NEXT: lbu s9, 21(a0)
+; RV64I-NEXT: lbu s10, 22(a0)
+; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a6, t2, t1
-; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: lbu t5, 20(a0)
-; RV64I-NEXT: lbu t6, 21(a0)
-; RV64I-NEXT: lbu s8, 22(a0)
-; RV64I-NEXT: lbu s9, 23(a0)
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
+; RV64I-NEXT: or a7, t4, t3
+; RV64I-NEXT: or t0, t6, t5
+; RV64I-NEXT: or t1, s1, s0
+; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: lbu t6, 24(a0)
+; RV64I-NEXT: lbu s0, 25(a0)
+; RV64I-NEXT: lbu s1, 26(a0)
+; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
-; RV64I-NEXT: or t1, s1, s0
-; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
-; RV64I-NEXT: lbu s0, 24(a0)
-; RV64I-NEXT: lbu s1, 25(a0)
-; RV64I-NEXT: lbu s2, 26(a0)
-; RV64I-NEXT: lbu s3, 27(a0)
-; RV64I-NEXT: slli t6, t6, 8
-; RV64I-NEXT: slli s8, s8, 16
-; RV64I-NEXT: slli s9, s9, 24
-; RV64I-NEXT: slli s1, s1, 8
-; RV64I-NEXT: or t5, t6, t5
-; RV64I-NEXT: or t6, s9, s8
-; RV64I-NEXT: or s0, s1, s0
-; RV64I-NEXT: lbu s1, 28(a0)
+; RV64I-NEXT: or t5, s9, s8
+; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
-; RV64I-NEXT: lbu a0, 0(a1)
-; RV64I-NEXT: slli s2, s2, 16
-; RV64I-NEXT: slli s3, s3, 24
-; RV64I-NEXT: or a1, s3, s2
-; RV64I-NEXT: mv s2, sp
+; RV64I-NEXT: slli s10, s10, 16
+; RV64I-NEXT: slli s11, s11, 24
+; RV64I-NEXT: slli s0, s0, 8
+; RV64I-NEXT: slli s1, s1, 16
+; RV64I-NEXT: slli s2, s2, 24
; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or a0, s11, s10
+; RV64I-NEXT: or t6, s0, t6
+; RV64I-NEXT: or s0, s2, s1
+; RV64I-NEXT: or s1, s4, s3
+; RV64I-NEXT: lbu s2, 0(a1)
+; RV64I-NEXT: lbu s3, 1(a1)
+; RV64I-NEXT: lbu s4, 2(a1)
+; RV64I-NEXT: lbu s7, 3(a1)
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
-; RV64I-NEXT: or s1, s4, s1
-; RV64I-NEXT: srli s3, a0, 3
-; RV64I-NEXT: or s4, s6, s5
-; RV64I-NEXT: andi s5, a0, 63
-; RV64I-NEXT: andi s3, s3, 24
-; RV64I-NEXT: xori s5, s5, 63
+; RV64I-NEXT: slli s3, s3, 8
+; RV64I-NEXT: slli s4, s4, 16
+; RV64I-NEXT: slli s7, s7, 24
+; RV64I-NEXT: or s5, s6, s5
+; RV64I-NEXT: or s2, s3, s2
+; RV64I-NEXT: or s3, s7, s4
+; RV64I-NEXT: lbu s4, 5(a1)
+; RV64I-NEXT: lbu s6, 4(a1)
+; RV64I-NEXT: lbu s7, 6(a1)
+; RV64I-NEXT: lbu a1, 7(a1)
+; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or s4, s4, s6
+; RV64I-NEXT: slli s7, s7, 16
+; RV64I-NEXT: slli a1, a1, 24
+; RV64I-NEXT: or a1, a1, s7
+; RV64I-NEXT: mv s6, sp
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: or a1, a1, s0
-; RV64I-NEXT: or t1, s4, s1
-; RV64I-NEXT: add s2, s2, s3
+; RV64I-NEXT: or a0, a0, t5
+; RV64I-NEXT: or t0, s0, t6
+; RV64I-NEXT: or t1, s5, s1
+; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: or a1, a1, s4
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: slli t0, t0, 32
-; RV64I-NEXT: slli t2, t1, 32
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: slli t3, t1, 32
+; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: sraiw t1, t1, 31
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a1, t2, a1
+; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a5, t3, t0
+; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: sd t1, 32(sp)
; RV64I-NEXT: sd t1, 40(sp)
; RV64I-NEXT: sd t1, 48(sp)
; RV64I-NEXT: sd t1, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
; RV64I-NEXT: sd a4, 8(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a1, 24(sp)
-; RV64I-NEXT: ld a1, 8(s2)
-; RV64I-NEXT: ld a3, 16(s2)
-; RV64I-NEXT: ld a4, 0(s2)
-; RV64I-NEXT: ld a5, 24(s2)
-; RV64I-NEXT: srl a6, a1, a0
-; RV64I-NEXT: slli a7, a3, 1
-; RV64I-NEXT: srl a4, a4, a0
-; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: srl a3, a3, a0
+; RV64I-NEXT: sd a0, 16(sp)
+; RV64I-NEXT: sd a5, 24(sp)
+; RV64I-NEXT: srli a0, a1, 3
+; RV64I-NEXT: andi a3, a1, 63
+; RV64I-NEXT: andi a0, a0, 24
+; RV64I-NEXT: xori a3, a3, 63
+; RV64I-NEXT: add a0, s6, a0
+; RV64I-NEXT: ld a4, 8(a0)
+; RV64I-NEXT: ld a5, 16(a0)
+; RV64I-NEXT: ld a6, 0(a0)
+; RV64I-NEXT: ld a0, 24(a0)
+; RV64I-NEXT: srl a7, a4, a1
; RV64I-NEXT: slli t0, a5, 1
-; RV64I-NEXT: sra a5, a5, a0
-; RV64I-NEXT: sll a0, a7, s5
-; RV64I-NEXT: sll a1, a1, s5
-; RV64I-NEXT: sll a7, t0, s5
-; RV64I-NEXT: srli t0, a5, 56
-; RV64I-NEXT: srli t1, a5, 48
-; RV64I-NEXT: srli t2, a5, 40
-; RV64I-NEXT: srli t3, a5, 32
-; RV64I-NEXT: srli t4, a5, 24
-; RV64I-NEXT: srli t5, a5, 16
-; RV64I-NEXT: srli t6, a5, 8
-; RV64I-NEXT: or a0, a6, a0
-; RV64I-NEXT: or a1, a4, a1
-; RV64I-NEXT: or a3, a3, a7
+; RV64I-NEXT: srl a6, a6, a1
+; RV64I-NEXT: slli a4, a4, 1
+; RV64I-NEXT: srl a5, a5, a1
+; RV64I-NEXT: slli t1, a0, 1
+; RV64I-NEXT: sra t2, a0, a1
+; RV64I-NEXT: sll a0, t0, a3
+; RV64I-NEXT: sll a1, a4, a3
+; RV64I-NEXT: sll a3, t1, a3
+; RV64I-NEXT: srli a4, t2, 56
+; RV64I-NEXT: srli t0, t2, 48
+; RV64I-NEXT: srli t1, t2, 40
+; RV64I-NEXT: srli t3, t2, 32
+; RV64I-NEXT: srli t4, t2, 24
+; RV64I-NEXT: srli t5, t2, 16
+; RV64I-NEXT: srli t6, t2, 8
+; RV64I-NEXT: or a0, a7, a0
+; RV64I-NEXT: or a1, a6, a1
+; RV64I-NEXT: or a3, a5, a3
; RV64I-NEXT: sb t3, 28(a2)
-; RV64I-NEXT: sb t2, 29(a2)
-; RV64I-NEXT: sb t1, 30(a2)
-; RV64I-NEXT: sb t0, 31(a2)
-; RV64I-NEXT: sb a5, 24(a2)
+; RV64I-NEXT: sb t1, 29(a2)
+; RV64I-NEXT: sb t0, 30(a2)
+; RV64I-NEXT: sb a4, 31(a2)
+; RV64I-NEXT: sb t2, 24(a2)
; RV64I-NEXT: sb t6, 25(a2)
; RV64I-NEXT: sb t5, 26(a2)
; RV64I-NEXT: sb t4, 27(a2)
@@ -2316,45 +2438,47 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: srli s3, a0, 56
; RV64I-NEXT: srli s4, a0, 48
; RV64I-NEXT: srli s5, a0, 40
+; RV64I-NEXT: srli s6, a0, 32
; RV64I-NEXT: sb a7, 20(a2)
; RV64I-NEXT: sb a6, 21(a2)
; RV64I-NEXT: sb a5, 22(a2)
; RV64I-NEXT: sb a4, 23(a2)
-; RV64I-NEXT: srli a4, a0, 32
+; RV64I-NEXT: srli a4, a0, 24
; RV64I-NEXT: sb a3, 16(a2)
; RV64I-NEXT: sb t2, 17(a2)
; RV64I-NEXT: sb t1, 18(a2)
; RV64I-NEXT: sb t0, 19(a2)
-; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
; RV64I-NEXT: sb t6, 4(a2)
; RV64I-NEXT: sb t5, 5(a2)
; RV64I-NEXT: sb t4, 6(a2)
; RV64I-NEXT: sb t3, 7(a2)
-; RV64I-NEXT: srli a5, a0, 16
+; RV64I-NEXT: srli a5, a0, 8
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb s2, 1(a2)
; RV64I-NEXT: sb s1, 2(a2)
; RV64I-NEXT: sb s0, 3(a2)
-; RV64I-NEXT: srli a1, a0, 8
-; RV64I-NEXT: sb a4, 12(a2)
+; RV64I-NEXT: sb s6, 12(a2)
; RV64I-NEXT: sb s5, 13(a2)
; RV64I-NEXT: sb s4, 14(a2)
; RV64I-NEXT: sb s3, 15(a2)
; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: sb a5, 10(a2)
-; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: ld s0, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s9, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 144
+; RV64I-NEXT: sb a5, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
+; RV64I-NEXT: sb a4, 11(a2)
+; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_32bytes:
@@ -2379,148 +2503,159 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lbu a7, 3(a0)
; RV32I-NEXT: lbu a5, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
-; RV32I-NEXT: lbu t3, 6(a0)
-; RV32I-NEXT: lbu t4, 7(a0)
-; RV32I-NEXT: lbu t6, 8(a0)
-; RV32I-NEXT: lbu s0, 9(a0)
-; RV32I-NEXT: lbu s4, 10(a0)
-; RV32I-NEXT: lbu s5, 11(a0)
-; RV32I-NEXT: lbu s6, 12(a0)
-; RV32I-NEXT: lbu s7, 13(a0)
-; RV32I-NEXT: lbu s8, 14(a0)
-; RV32I-NEXT: lbu s9, 15(a0)
-; RV32I-NEXT: lbu s10, 16(a0)
-; RV32I-NEXT: lbu s11, 17(a0)
-; RV32I-NEXT: lbu s2, 18(a0)
-; RV32I-NEXT: lbu s3, 19(a0)
+; RV32I-NEXT: lbu t1, 6(a0)
+; RV32I-NEXT: lbu t2, 7(a0)
+; RV32I-NEXT: lbu t3, 8(a0)
+; RV32I-NEXT: lbu t4, 9(a0)
+; RV32I-NEXT: lbu t5, 10(a0)
+; RV32I-NEXT: lbu t6, 11(a0)
+; RV32I-NEXT: lbu s0, 12(a0)
+; RV32I-NEXT: lbu s1, 13(a0)
+; RV32I-NEXT: lbu s2, 14(a0)
+; RV32I-NEXT: lbu s3, 15(a0)
+; RV32I-NEXT: lbu s4, 16(a0)
+; RV32I-NEXT: lbu s5, 17(a0)
+; RV32I-NEXT: lbu s6, 18(a0)
+; RV32I-NEXT: lbu s7, 19(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a7, a7, 24
; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a4, a7, a6
-; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 21(a0)
-; RV32I-NEXT: lbu t5, 22(a0)
-; RV32I-NEXT: lbu s1, 23(a0)
+; RV32I-NEXT: lbu s8, 20(a0)
+; RV32I-NEXT: lbu s9, 21(a0)
+; RV32I-NEXT: lbu s10, 22(a0)
+; RV32I-NEXT: lbu s11, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: slli t3, t3, 16
-; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: slli s0, s0, 8
-; RV32I-NEXT: slli s4, s4, 16
-; RV32I-NEXT: slli s5, s5, 24
+; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t2, t2, 24
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: slli t5, t5, 16
+; RV32I-NEXT: slli t6, t6, 24
; RV32I-NEXT: or a5, t0, a5
-; RV32I-NEXT: or a6, t4, t3
-; RV32I-NEXT: or a7, s0, t6
-; RV32I-NEXT: or t0, s5, s4
-; RV32I-NEXT: lbu t3, 24(a0)
-; RV32I-NEXT: lbu s4, 25(a0)
-; RV32I-NEXT: lbu s5, 26(a0)
-; RV32I-NEXT: lbu ra, 27(a0)
-; RV32I-NEXT: slli s7, s7, 8
-; RV32I-NEXT: slli s8, s8, 16
-; RV32I-NEXT: slli s9, s9, 24
-; RV32I-NEXT: slli s11, s11, 8
-; RV32I-NEXT: or t4, s7, s6
-; RV32I-NEXT: or t6, s9, s8
-; RV32I-NEXT: or s0, s11, s10
-; RV32I-NEXT: lbu s6, 28(a0)
-; RV32I-NEXT: lbu s7, 29(a0)
-; RV32I-NEXT: lbu s8, 30(a0)
-; RV32I-NEXT: lbu s9, 31(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
+; RV32I-NEXT: or a6, t2, t1
+; RV32I-NEXT: or a7, t4, t3
+; RV32I-NEXT: or t0, t6, t5
+; RV32I-NEXT: lbu ra, 24(a0)
+; RV32I-NEXT: lbu a3, 25(a0)
+; RV32I-NEXT: lbu t4, 26(a0)
+; RV32I-NEXT: lbu t5, 27(a0)
+; RV32I-NEXT: slli s1, s1, 8
; RV32I-NEXT: slli s2, s2, 16
; RV32I-NEXT: slli s3, s3, 24
-; RV32I-NEXT: or s2, s3, s2
-; RV32I-NEXT: addi s3, sp, 8
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: slli t5, t5, 16
-; RV32I-NEXT: slli s1, s1, 24
-; RV32I-NEXT: slli s4, s4, 8
-; RV32I-NEXT: slli s5, s5, 16
-; RV32I-NEXT: slli ra, ra, 24
-; RV32I-NEXT: slli s7, s7, 8
-; RV32I-NEXT: slli s8, s8, 16
-; RV32I-NEXT: slli s9, s9, 24
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: srli a1, a0, 3
+; RV32I-NEXT: slli s5, s5, 8
+; RV32I-NEXT: or t1, s1, s0
+; RV32I-NEXT: or t2, s3, s2
+; RV32I-NEXT: or t3, s5, s4
+; RV32I-NEXT: lbu t6, 28(a0)
+; RV32I-NEXT: lbu s0, 29(a0)
+; RV32I-NEXT: lbu s1, 30(a0)
+; RV32I-NEXT: lbu a0, 31(a0)
+; RV32I-NEXT: slli s6, s6, 16
+; RV32I-NEXT: slli s7, s7, 24
+; RV32I-NEXT: slli s9, s9, 8
+; RV32I-NEXT: slli s10, s10, 16
+; RV32I-NEXT: slli s11, s11, 24
+; RV32I-NEXT: or s2, s7, s6
+; RV32I-NEXT: or s3, s9, s8
+; RV32I-NEXT: or s4, s11, s10
+; RV32I-NEXT: lbu s5, 0(a1)
+; RV32I-NEXT: lbu s6, 1(a1)
+; RV32I-NEXT: lbu s7, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
+; RV32I-NEXT: slli a3, a3, 8
+; RV32I-NEXT: or a3, a3, ra
+; RV32I-NEXT: addi s8, sp, 8
+; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t5, t5, 24
+; RV32I-NEXT: slli s0, s0, 8
+; RV32I-NEXT: slli s1, s1, 16
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: slli s6, s6, 8
+; RV32I-NEXT: slli s7, s7, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or t4, t5, t4
+; RV32I-NEXT: or t5, s0, t6
+; RV32I-NEXT: or s1, a0, s1
+; RV32I-NEXT: or t6, s6, s5
+; RV32I-NEXT: or a1, a1, s7
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: or a4, a4, a0
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: or a6, t0, a7
+; RV32I-NEXT: or a7, t2, t1
+; RV32I-NEXT: or t0, s2, t3
+; RV32I-NEXT: or t1, s4, s3
+; RV32I-NEXT: or a3, t4, a3
; RV32I-NEXT: or t2, s1, t5
-; RV32I-NEXT: andi t5, a0, 31
-; RV32I-NEXT: or t3, s4, t3
-; RV32I-NEXT: or s1, ra, s5
-; RV32I-NEXT: or s4, s7, s6
-; RV32I-NEXT: or s5, s9, s8
-; RV32I-NEXT: srai s6, s9, 31
-; RV32I-NEXT: andi s7, a1, 28
-; RV32I-NEXT: xori a1, t5, 31
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a5, t0, a7
-; RV32I-NEXT: or a6, t6, t4
-; RV32I-NEXT: or a7, s2, s0
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or t1, s1, t3
-; RV32I-NEXT: or t2, s5, s4
-; RV32I-NEXT: sw s6, 56(sp)
-; RV32I-NEXT: sw s6, 60(sp)
-; RV32I-NEXT: sw s6, 64(sp)
-; RV32I-NEXT: sw s6, 68(sp)
-; RV32I-NEXT: sw s6, 40(sp)
-; RV32I-NEXT: sw s6, 44(sp)
-; RV32I-NEXT: sw s6, 48(sp)
-; RV32I-NEXT: sw s6, 52(sp)
-; RV32I-NEXT: add s3, s3, s7
-; RV32I-NEXT: sw a7, 24(sp)
-; RV32I-NEXT: sw t0, 28(sp)
-; RV32I-NEXT: sw t1, 32(sp)
+; RV32I-NEXT: or a0, a1, t6
+; RV32I-NEXT: sw s0, 56(sp)
+; RV32I-NEXT: sw s0, 60(sp)
+; RV32I-NEXT: sw s0, 64(sp)
+; RV32I-NEXT: sw s0, 68(sp)
+; RV32I-NEXT: sw s0, 40(sp)
+; RV32I-NEXT: sw s0, 44(sp)
+; RV32I-NEXT: sw s0, 48(sp)
+; RV32I-NEXT: sw s0, 52(sp)
+; RV32I-NEXT: sw t0, 24(sp)
+; RV32I-NEXT: sw t1, 28(sp)
+; RV32I-NEXT: sw a3, 32(sp)
; RV32I-NEXT: sw t2, 36(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a5, 16(sp)
-; RV32I-NEXT: sw a6, 20(sp)
-; RV32I-NEXT: lw a3, 0(s3)
-; RV32I-NEXT: lw a4, 4(s3)
-; RV32I-NEXT: lw a5, 8(s3)
-; RV32I-NEXT: lw a6, 12(s3)
-; RV32I-NEXT: lw a7, 16(s3)
-; RV32I-NEXT: lw t0, 20(s3)
-; RV32I-NEXT: lw t1, 24(s3)
-; RV32I-NEXT: lw t2, 28(s3)
-; RV32I-NEXT: srl t3, a4, a0
-; RV32I-NEXT: slli t4, a5, 1
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a6, 16(sp)
+; RV32I-NEXT: sw a7, 20(sp)
+; RV32I-NEXT: srli a1, a0, 3
+; RV32I-NEXT: andi a3, a0, 31
+; RV32I-NEXT: andi a4, a1, 28
+; RV32I-NEXT: xori a1, a3, 31
+; RV32I-NEXT: add a4, s8, a4
+; RV32I-NEXT: lw a3, 0(a4)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a6, 8(a4)
+; RV32I-NEXT: lw a7, 12(a4)
+; RV32I-NEXT: lw t0, 16(a4)
+; RV32I-NEXT: lw t1, 20(a4)
+; RV32I-NEXT: lw t2, 24(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: srl t3, a5, a0
+; RV32I-NEXT: slli t4, a6, 1
; RV32I-NEXT: srl a3, a3, a0
-; RV32I-NEXT: slli a4, a4, 1
-; RV32I-NEXT: srl t5, a6, a0
-; RV32I-NEXT: slli t6, a7, 1
-; RV32I-NEXT: srl a5, a5, a0
-; RV32I-NEXT: slli a6, a6, 1
-; RV32I-NEXT: srl s0, t0, a0
-; RV32I-NEXT: slli s1, t1, 1
-; RV32I-NEXT: srl a7, a7, a0
-; RV32I-NEXT: slli t0, t0, 1
-; RV32I-NEXT: srl t1, t1, a0
-; RV32I-NEXT: slli s2, t2, 1
-; RV32I-NEXT: sra t2, t2, a0
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: srl t5, a7, a0
+; RV32I-NEXT: slli t6, t0, 1
+; RV32I-NEXT: srl a6, a6, a0
+; RV32I-NEXT: slli a7, a7, 1
+; RV32I-NEXT: srl s0, t1, a0
+; RV32I-NEXT: slli s1, t2, 1
+; RV32I-NEXT: srl t0, t0, a0
+; RV32I-NEXT: slli t1, t1, 1
+; RV32I-NEXT: srl t2, t2, a0
+; RV32I-NEXT: slli s2, a4, 1
+; RV32I-NEXT: sra s3, a4, a0
; RV32I-NEXT: sll a0, t4, a1
-; RV32I-NEXT: sll a4, a4, a1
-; RV32I-NEXT: sll t4, t6, a1
-; RV32I-NEXT: sll a6, a6, a1
-; RV32I-NEXT: sll t6, s1, a1
-; RV32I-NEXT: sll t0, t0, a1
-; RV32I-NEXT: sll s1, s2, a1
-; RV32I-NEXT: srli s2, t2, 24
-; RV32I-NEXT: srli s3, t2, 16
-; RV32I-NEXT: srli s4, t2, 8
+; RV32I-NEXT: sll a4, a5, a1
+; RV32I-NEXT: sll a5, t6, a1
+; RV32I-NEXT: sll a7, a7, a1
+; RV32I-NEXT: sll t4, s1, a1
+; RV32I-NEXT: sll t1, t1, a1
+; RV32I-NEXT: sll t6, s2, a1
+; RV32I-NEXT: srli s1, s3, 24
+; RV32I-NEXT: srli s2, s3, 16
+; RV32I-NEXT: srli s4, s3, 8
; RV32I-NEXT: or a0, t3, a0
; RV32I-NEXT: or a1, a3, a4
-; RV32I-NEXT: or a3, t5, t4
-; RV32I-NEXT: or a4, a5, a6
-; RV32I-NEXT: or a5, s0, t6
-; RV32I-NEXT: or a6, a7, t0
-; RV32I-NEXT: or a7, t1, s1
-; RV32I-NEXT: sb t2, 28(a2)
+; RV32I-NEXT: or a3, t5, a5
+; RV32I-NEXT: or a4, a6, a7
+; RV32I-NEXT: or a5, s0, t4
+; RV32I-NEXT: or a6, t0, t1
+; RV32I-NEXT: or a7, t2, t6
+; RV32I-NEXT: sb s3, 28(a2)
; RV32I-NEXT: sb s4, 29(a2)
-; RV32I-NEXT: sb s3, 30(a2)
-; RV32I-NEXT: sb s2, 31(a2)
+; RV32I-NEXT: sb s2, 30(a2)
+; RV32I-NEXT: sb s1, 31(a2)
; RV32I-NEXT: srli t0, a7, 24
; RV32I-NEXT: srli t1, a7, 16
; RV32I-NEXT: srli t2, a7, 8
diff --git a/llvm/test/CodeGen/RISCV/xqciac.ll b/llvm/test/CodeGen/RISCV/xqciac.ll
index a3b4e78..4c77b39 100644
--- a/llvm/test/CodeGen/RISCV/xqciac.ll
+++ b/llvm/test/CodeGen/RISCV/xqciac.ll
@@ -231,12 +231,12 @@ define dso_local i32 @pow2(i32 %a, i32 %b) local_unnamed_addr #0 {
;
; RV32IMXQCIAC-LABEL: pow2:
; RV32IMXQCIAC: # %bb.0: # %entry
-; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 5
+; RV32IMXQCIAC-NEXT: qc.shladd a0, a1, a0, 5
; RV32IMXQCIAC-NEXT: ret
;
; RV32IZBAMXQCIAC-LABEL: pow2:
; RV32IZBAMXQCIAC: # %bb.0: # %entry
-; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a0, a1, 5
+; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a1, a0, 5
; RV32IZBAMXQCIAC-NEXT: ret
entry:
%mul = mul nsw i32 %b, 32
@@ -276,12 +276,12 @@ define dso_local i32 @shladd(i32 %a, i32 %b) local_unnamed_addr #0 {
;
; RV32IMXQCIAC-LABEL: shladd:
; RV32IMXQCIAC: # %bb.0: # %entry
-; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 31
+; RV32IMXQCIAC-NEXT: qc.shladd a0, a1, a0, 31
; RV32IMXQCIAC-NEXT: ret
;
; RV32IZBAMXQCIAC-LABEL: shladd:
; RV32IZBAMXQCIAC: # %bb.0: # %entry
-; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a0, a1, 31
+; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a1, a0, 31
; RV32IZBAMXQCIAC-NEXT: ret
entry:
%shl = shl nsw i32 %b, 31
@@ -305,9 +305,9 @@ define dso_local i64 @shladd64(i64 %a, i64 %b) local_unnamed_addr #0 {
; RV32IMXQCIAC-LABEL: shladd64:
; RV32IMXQCIAC: # %bb.0: # %entry
; RV32IMXQCIAC-NEXT: srli a4, a2, 1
-; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a2, 31
+; RV32IMXQCIAC-NEXT: qc.shladd a0, a2, a0, 31
; RV32IMXQCIAC-NEXT: slli a2, a2, 31
-; RV32IMXQCIAC-NEXT: qc.shladd a3, a4, a3, 31
+; RV32IMXQCIAC-NEXT: qc.shladd a3, a3, a4, 31
; RV32IMXQCIAC-NEXT: sltu a2, a0, a2
; RV32IMXQCIAC-NEXT: add a1, a1, a3
; RV32IMXQCIAC-NEXT: add a1, a1, a2
@@ -316,9 +316,9 @@ define dso_local i64 @shladd64(i64 %a, i64 %b) local_unnamed_addr #0 {
; RV32IZBAMXQCIAC-LABEL: shladd64:
; RV32IZBAMXQCIAC: # %bb.0: # %entry
; RV32IZBAMXQCIAC-NEXT: srli a4, a2, 1
-; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a0, a2, 31
+; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a2, a0, 31
; RV32IZBAMXQCIAC-NEXT: slli a2, a2, 31
-; RV32IZBAMXQCIAC-NEXT: qc.shladd a3, a4, a3, 31
+; RV32IZBAMXQCIAC-NEXT: qc.shladd a3, a3, a4, 31
; RV32IZBAMXQCIAC-NEXT: sltu a2, a0, a2
; RV32IZBAMXQCIAC-NEXT: add a1, a1, a3
; RV32IZBAMXQCIAC-NEXT: add a1, a1, a2
@@ -338,12 +338,12 @@ define dso_local i32 @shladd_ordisjoint(i32 %a, i32 %b) local_unnamed_addr #0 {
;
; RV32IMXQCIAC-LABEL: shladd_ordisjoint:
; RV32IMXQCIAC: # %bb.0: # %entry
-; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 22
+; RV32IMXQCIAC-NEXT: qc.shladd a0, a1, a0, 22
; RV32IMXQCIAC-NEXT: ret
;
; RV32IZBAMXQCIAC-LABEL: shladd_ordisjoint:
; RV32IZBAMXQCIAC: # %bb.0: # %entry
-; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a0, a1, 22
+; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a1, a0, 22
; RV32IZBAMXQCIAC-NEXT: ret
entry:
%shl = shl nsw i32 %b, 22
@@ -361,13 +361,13 @@ define dso_local i32 @shladdc1c2(i32 %a, i32 %b) local_unnamed_addr #0 {
;
; RV32IMXQCIAC-LABEL: shladdc1c2:
; RV32IMXQCIAC: # %bb.0: # %entry
-; RV32IMXQCIAC-NEXT: qc.shladd a0, a1, a0, 5
+; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 5
; RV32IMXQCIAC-NEXT: slli a0, a0, 26
; RV32IMXQCIAC-NEXT: ret
;
; RV32IZBAMXQCIAC-LABEL: shladdc1c2:
; RV32IZBAMXQCIAC: # %bb.0: # %entry
-; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a1, a0, 5
+; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a0, a1, 5
; RV32IZBAMXQCIAC-NEXT: slli a0, a0, 26
; RV32IZBAMXQCIAC-NEXT: ret
entry:
@@ -388,7 +388,7 @@ define dso_local i32 @shxaddc1c2(i32 %a, i32 %b) local_unnamed_addr #0 {
; RV32IMXQCIAC-LABEL: shxaddc1c2:
; RV32IMXQCIAC: # %bb.0: # %entry
; RV32IMXQCIAC-NEXT: slli a1, a1, 28
-; RV32IMXQCIAC-NEXT: qc.shladd a0, a1, a0, 31
+; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 31
; RV32IMXQCIAC-NEXT: ret
;
; RV32IZBAMXQCIAC-LABEL: shxaddc1c2:
@@ -417,18 +417,18 @@ define dso_local i64 @shladdc1c264(i64 %a, i64 %b) local_unnamed_addr #0 {
; RV32IMXQCIAC-LABEL: shladdc1c264:
; RV32IMXQCIAC: # %bb.0: # %entry
; RV32IMXQCIAC-NEXT: srli a1, a2, 12
-; RV32IMXQCIAC-NEXT: qc.shladd a1, a1, a3, 20
+; RV32IMXQCIAC-NEXT: qc.shladd a1, a3, a1, 20
; RV32IMXQCIAC-NEXT: slli a2, a2, 20
-; RV32IMXQCIAC-NEXT: qc.shladd a1, a1, a0, 23
+; RV32IMXQCIAC-NEXT: qc.shladd a1, a0, a1, 23
; RV32IMXQCIAC-NEXT: mv a0, a2
; RV32IMXQCIAC-NEXT: ret
;
; RV32IZBAMXQCIAC-LABEL: shladdc1c264:
; RV32IZBAMXQCIAC: # %bb.0: # %entry
; RV32IZBAMXQCIAC-NEXT: srli a1, a2, 12
-; RV32IZBAMXQCIAC-NEXT: qc.shladd a1, a1, a3, 20
+; RV32IZBAMXQCIAC-NEXT: qc.shladd a1, a3, a1, 20
; RV32IZBAMXQCIAC-NEXT: slli a2, a2, 20
-; RV32IZBAMXQCIAC-NEXT: qc.shladd a1, a1, a0, 23
+; RV32IZBAMXQCIAC-NEXT: qc.shladd a1, a0, a1, 23
; RV32IZBAMXQCIAC-NEXT: mv a0, a2
; RV32IZBAMXQCIAC-NEXT: ret
entry:
@@ -449,13 +449,13 @@ define dso_local i32 @shladdc1equalc2(i32 %a, i32 %b) local_unnamed_addr #0 {
; RV32IMXQCIAC-LABEL: shladdc1equalc2:
; RV32IMXQCIAC: # %bb.0: # %entry
; RV32IMXQCIAC-NEXT: slli a1, a1, 12
-; RV32IMXQCIAC-NEXT: qc.shladd a0, a1, a0, 12
+; RV32IMXQCIAC-NEXT: qc.shladd a0, a0, a1, 12
; RV32IMXQCIAC-NEXT: ret
;
; RV32IZBAMXQCIAC-LABEL: shladdc1equalc2:
; RV32IZBAMXQCIAC: # %bb.0: # %entry
; RV32IZBAMXQCIAC-NEXT: slli a1, a1, 12
-; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a1, a0, 12
+; RV32IZBAMXQCIAC-NEXT: qc.shladd a0, a0, a1, 12
; RV32IZBAMXQCIAC-NEXT: ret
entry:
%shlc1 = shl nsw i32 %a, 12
diff --git a/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll
index cdaae23..5724c4f 100644
--- a/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll
@@ -1,33 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d -mattr=+xtheadfmemidx -mattr=+m -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefix=RV32XTHEADMEMIDX
-; RUN: llc -mtriple=riscv64 -mattr=+d -mattr=+xtheadfmemidx -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefix=RV64XTHEADFMEMIDX
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d,+xtheadfmemidx \
+; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV32XTHEADFMEMIDX
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d,+xtheadfmemidx \
+; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV64XTHEADFMEMIDX
-define float @flrw(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: flrw:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.flrw fa5, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: fadd.s fa0, fa5, fa5
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADFMEMIDX-LABEL: flrw:
-; RV64XTHEADFMEMIDX: # %bb.0:
-; RV64XTHEADFMEMIDX-NEXT: th.flrw fa5, a0, a1, 2
-; RV64XTHEADFMEMIDX-NEXT: fadd.s fa0, fa5, fa5
-; RV64XTHEADFMEMIDX-NEXT: ret
- %1 = getelementptr float, ptr %a, i64 %b
+define float @flrw(ptr %a, iXLen %b) {
+; CHECK-LABEL: flrw:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.flrw fa5, a0, a1, 2
+; CHECK-NEXT: fadd.s fa0, fa5, fa5
+; CHECK-NEXT: ret
+ %1 = getelementptr float, ptr %a, iXLen %b
%2 = load float, ptr %1, align 4
%3 = fadd float %2, %2
ret float %3
}
define float @flurw(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: flurw:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.flrw fa5, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: fadd.s fa0, fa5, fa5
-; RV32XTHEADMEMIDX-NEXT: ret
+; RV32XTHEADFMEMIDX-LABEL: flurw:
+; RV32XTHEADFMEMIDX: # %bb.0:
+; RV32XTHEADFMEMIDX-NEXT: th.flrw fa5, a0, a1, 2
+; RV32XTHEADFMEMIDX-NEXT: fadd.s fa0, fa5, fa5
+; RV32XTHEADFMEMIDX-NEXT: ret
;
; RV64XTHEADFMEMIDX-LABEL: flurw:
; RV64XTHEADFMEMIDX: # %bb.0:
@@ -41,30 +35,24 @@ define float @flurw(ptr %a, i32 %b) {
ret float %4
}
-define void @fsrw(ptr %a, i64 %b, float %c) {
-; RV32XTHEADMEMIDX-LABEL: fsrw:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: fadd.s fa5, fa0, fa0
-; RV32XTHEADMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADFMEMIDX-LABEL: fsrw:
-; RV64XTHEADFMEMIDX: # %bb.0:
-; RV64XTHEADFMEMIDX-NEXT: fadd.s fa5, fa0, fa0
-; RV64XTHEADFMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2
-; RV64XTHEADFMEMIDX-NEXT: ret
+define void @fsrw(ptr %a, iXLen %b, float %c) {
+; CHECK-LABEL: fsrw:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fadd.s fa5, fa0, fa0
+; CHECK-NEXT: th.fsrw fa5, a0, a1, 2
+; CHECK-NEXT: ret
%1 = fadd float %c, %c
- %2 = getelementptr float, ptr %a, i64 %b
+ %2 = getelementptr float, ptr %a, iXLen %b
store float %1, ptr %2, align 4
ret void
}
define void @fsurw(ptr %a, i32 %b, float %c) {
-; RV32XTHEADMEMIDX-LABEL: fsurw:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: fadd.s fa5, fa0, fa0
-; RV32XTHEADMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: ret
+; RV32XTHEADFMEMIDX-LABEL: fsurw:
+; RV32XTHEADFMEMIDX: # %bb.0:
+; RV32XTHEADFMEMIDX-NEXT: fadd.s fa5, fa0, fa0
+; RV32XTHEADFMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2
+; RV32XTHEADFMEMIDX-NEXT: ret
;
; RV64XTHEADFMEMIDX-LABEL: fsurw:
; RV64XTHEADFMEMIDX: # %bb.0:
@@ -78,30 +66,24 @@ define void @fsurw(ptr %a, i32 %b, float %c) {
ret void
}
-define double @flrd(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: flrd:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.flrd fa5, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: fadd.d fa0, fa5, fa5
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADFMEMIDX-LABEL: flrd:
-; RV64XTHEADFMEMIDX: # %bb.0:
-; RV64XTHEADFMEMIDX-NEXT: th.flrd fa5, a0, a1, 3
-; RV64XTHEADFMEMIDX-NEXT: fadd.d fa0, fa5, fa5
-; RV64XTHEADFMEMIDX-NEXT: ret
- %1 = getelementptr double, ptr %a, i64 %b
+define double @flrd(ptr %a, iXLen %b) {
+; CHECK-LABEL: flrd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.flrd fa5, a0, a1, 3
+; CHECK-NEXT: fadd.d fa0, fa5, fa5
+; CHECK-NEXT: ret
+ %1 = getelementptr double, ptr %a, iXLen %b
%2 = load double, ptr %1, align 8
%3 = fadd double %2, %2
ret double %3
}
define double @flurd(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: flurd:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.flrd fa5, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: fadd.d fa0, fa5, fa5
-; RV32XTHEADMEMIDX-NEXT: ret
+; RV32XTHEADFMEMIDX-LABEL: flurd:
+; RV32XTHEADFMEMIDX: # %bb.0:
+; RV32XTHEADFMEMIDX-NEXT: th.flrd fa5, a0, a1, 3
+; RV32XTHEADFMEMIDX-NEXT: fadd.d fa0, fa5, fa5
+; RV32XTHEADFMEMIDX-NEXT: ret
;
; RV64XTHEADFMEMIDX-LABEL: flurd:
; RV64XTHEADFMEMIDX: # %bb.0:
@@ -115,30 +97,24 @@ define double @flurd(ptr %a, i32 %b) {
ret double %4
}
-define void @fsrd(ptr %a, i64 %b, double %c) {
-; RV32XTHEADMEMIDX-LABEL: fsrd:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: fadd.d fa5, fa0, fa0
-; RV32XTHEADMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADFMEMIDX-LABEL: fsrd:
-; RV64XTHEADFMEMIDX: # %bb.0:
-; RV64XTHEADFMEMIDX-NEXT: fadd.d fa5, fa0, fa0
-; RV64XTHEADFMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3
-; RV64XTHEADFMEMIDX-NEXT: ret
+define void @fsrd(ptr %a, iXLen %b, double %c) {
+; CHECK-LABEL: fsrd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fadd.d fa5, fa0, fa0
+; CHECK-NEXT: th.fsrd fa5, a0, a1, 3
+; CHECK-NEXT: ret
%1 = fadd double %c, %c
- %2 = getelementptr double, ptr %a, i64 %b
+ %2 = getelementptr double, ptr %a, iXLen %b
store double %1, ptr %2, align 8
ret void
}
define void @fsurd(ptr %a, i32 %b, double %c) {
-; RV32XTHEADMEMIDX-LABEL: fsurd:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: fadd.d fa5, fa0, fa0
-; RV32XTHEADMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: ret
+; RV32XTHEADFMEMIDX-LABEL: fsurd:
+; RV32XTHEADFMEMIDX: # %bb.0:
+; RV32XTHEADFMEMIDX-NEXT: fadd.d fa5, fa0, fa0
+; RV32XTHEADFMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3
+; RV32XTHEADFMEMIDX-NEXT: ret
;
; RV64XTHEADFMEMIDX-LABEL: fsurd:
; RV64XTHEADFMEMIDX: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
index fc20fcb..a20b08a 100644
--- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
@@ -1,238 +1,156 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefix=RV32XTHEADMEMIDX
-; RUN: llc -mtriple=riscv64 -mattr=+d -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefix=RV64XTHEADMEMIDX
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d,+xtheadmemidx \
+; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV32XTHEADMEMIDX
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d,+xtheadmemidx \
+; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV64XTHEADMEMIDX
define ptr @lbia(ptr %base, ptr %addr.2, i8 %a) {
-; RV32XTHEADMEMIDX-LABEL: lbia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lbia a3, (a0), -1, 0
-; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT: sb a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lbia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lbia a3, (a0), -1, 0
-; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT: sb a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i8, ptr %base, i8 0
+; CHECK-LABEL: lbia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lbia a3, (a0), -1, 0
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: sb a2, 0(a1)
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %base, iXLen 0
%ld = load i8, ptr %addr
- %addr.1 = getelementptr i8, ptr %base, i8 -1
+ %addr.1 = getelementptr i8, ptr %base, iXLen -1
%res = add i8 %ld, %a
store i8 %res, ptr %addr.2
ret ptr %addr.1
}
define ptr @lbib(ptr %base, i8 %a) {
-; RV32XTHEADMEMIDX-LABEL: lbib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lbib a2, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT: sb a1, 1(a0)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lbib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lbib a2, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV64XTHEADMEMIDX-NEXT: sb a1, 1(a0)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i8, ptr %base, i8 1
+; CHECK-LABEL: lbib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lbib a2, (a0), 1, 0
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: sb a1, 1(a0)
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %base, iXLen 1
%ld = load i8, ptr %addr
- %addr.1 = getelementptr i8, ptr %base, i8 2
+ %addr.1 = getelementptr i8, ptr %base, iXLen 2
%res = add i8 %ld, %a
store i8 %res, ptr %addr.1
ret ptr %addr
}
-define ptr @lbuia(ptr %base, ptr %addr.2, i64 %a) {
-; RV32XTHEADMEMIDX-LABEL: lbuia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lbuia a4, (a0), -1, 0
-; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2
-; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a4
-; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4
-; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lbuia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lbuia a3, (a0), -1, 0
-; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i8, ptr %base, i8 0
+define ptr @lbuia(ptr %base, ptr %addr.2, i32 %a) {
+; CHECK-LABEL: lbuia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lbuia a3, (a0), -1, 0
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: sw a2, 0(a1)
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %base, iXLen 0
%ld = load i8, ptr %addr
- %zext = zext i8 %ld to i64
- %addr.1 = getelementptr i8, ptr %base, i8 -1
- %res = add i64 %zext, %a
- store i64 %res, ptr %addr.2
+ %zext = zext i8 %ld to i32
+ %addr.1 = getelementptr i8, ptr %base, iXLen -1
+ %res = add i32 %zext, %a
+ store i32 %res, ptr %addr.2
ret ptr %addr.1
}
-define ptr @lbuib(ptr %base, i64 %a, ptr %addr.1) {
-; RV32XTHEADMEMIDX-LABEL: lbuib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lbuib a4, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT: add a1, a4, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a4, a1, a4
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4
-; RV32XTHEADMEMIDX-NEXT: sw a1, 0(a3)
-; RV32XTHEADMEMIDX-NEXT: sw a2, 4(a3)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lbuib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lbuib a3, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1
-; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i8, ptr %base, i8 1
+define ptr @lbuib(ptr %base, i32 %a, ptr %addr.1) {
+; CHECK-LABEL: lbuib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lbuib a3, (a0), 1, 0
+; CHECK-NEXT: add a1, a3, a1
+; CHECK-NEXT: sw a1, 0(a2)
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %base, iXLen 1
%ld = load i8, ptr %addr
- %zext = zext i8 %ld to i64
- %res = add i64 %zext, %a
- store i64 %res, ptr %addr.1
+ %zext = zext i8 %ld to i32
+ %res = add i32 %zext, %a
+ store i32 %res, ptr %addr.1
ret ptr %addr
}
define ptr @lhia(ptr %base, ptr %addr.2, i16 %a) {
-; RV32XTHEADMEMIDX-LABEL: lhia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lhia a3, (a0), -16, 1
-; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT: sh a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lhia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lhia a3, (a0), -16, 1
-; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT: sh a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i16, ptr %base, i16 0
+; CHECK-LABEL: lhia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lhia a3, (a0), -16, 1
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: sh a2, 0(a1)
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %base, iXLen 0
%ld = load i16, ptr %addr
- %addr.1 = getelementptr i16, ptr %base, i16 -16
+ %addr.1 = getelementptr i16, ptr %base, iXLen -16
%res = add i16 %ld, %a
store i16 %res, ptr %addr.2
ret ptr %addr.1
}
define ptr @lhib(ptr %base, i16 %a) {
-; RV32XTHEADMEMIDX-LABEL: lhib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lhib a2, (a0), 2, 0
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT: sh a1, 2(a0)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lhib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lhib a2, (a0), 2, 0
-; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV64XTHEADMEMIDX-NEXT: sh a1, 2(a0)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i16, ptr %base, i16 1
+; CHECK-LABEL: lhib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lhib a2, (a0), 2, 0
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: sh a1, 2(a0)
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %base, iXLen 1
%ld = load i16, ptr %addr
- %addr.1 = getelementptr i16, ptr %base, i16 2
+ %addr.1 = getelementptr i16, ptr %base, iXLen 2
%res = add i16 %ld, %a
store i16 %res, ptr %addr.1
ret ptr %addr
}
-define ptr @lhuia(ptr %base, ptr %addr.2, i64 %a) {
-; RV32XTHEADMEMIDX-LABEL: lhuia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lhuia a4, (a0), -16, 1
-; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2
-; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a4
-; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4
-; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lhuia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lhuia a3, (a0), -16, 1
-; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i16, ptr %base, i16 0
+define ptr @lhuia(ptr %base, ptr %addr.2, i32 %a) {
+; CHECK-LABEL: lhuia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lhuia a3, (a0), -16, 1
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: sw a2, 0(a1)
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %base, iXLen 0
%ld = load i16, ptr %addr
- %zext = zext i16 %ld to i64
- %addr.1 = getelementptr i16, ptr %base, i16 -16
- %res = add i64 %zext, %a
- store i64 %res, ptr %addr.2
+ %zext = zext i16 %ld to i32
+ %addr.1 = getelementptr i16, ptr %base, iXLen -16
+ %res = add i32 %zext, %a
+ store i32 %res, ptr %addr.2
ret ptr %addr.1
}
-define ptr @lhuib(ptr %base, i64 %a, ptr %addr.1) {
-; RV32XTHEADMEMIDX-LABEL: lhuib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lhuib a4, (a0), 2, 0
-; RV32XTHEADMEMIDX-NEXT: add a1, a4, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a4, a1, a4
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4
-; RV32XTHEADMEMIDX-NEXT: sw a1, 0(a3)
-; RV32XTHEADMEMIDX-NEXT: sw a2, 4(a3)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lhuib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lhuib a3, (a0), 2, 0
-; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1
-; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i16, ptr %base, i16 1
+define ptr @lhuib(ptr %base, i32 %a, ptr %addr.1) {
+; CHECK-LABEL: lhuib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lhuib a3, (a0), 2, 0
+; CHECK-NEXT: add a1, a3, a1
+; CHECK-NEXT: sw a1, 0(a2)
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %base, iXLen 1
%ld = load i16, ptr %addr
- %zext = zext i16 %ld to i64
- %res = add i64 %zext, %a
- store i64 %res, ptr %addr.1
+ %zext = zext i16 %ld to i32
+ %res = add i32 %zext, %a
+ store i32 %res, ptr %addr.1
ret ptr %addr
}
define ptr @lwia(ptr %base, ptr %addr.2, i32 %a) {
-; RV32XTHEADMEMIDX-LABEL: lwia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lwia a3, (a0), -16, 2
-; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lwia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lwia a3, (a0), -16, 2
-; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT: sw a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i32, ptr %base, i32 0
+; CHECK-LABEL: lwia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lwia a3, (a0), -16, 2
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: sw a2, 0(a1)
+; CHECK-NEXT: ret
+ %addr = getelementptr i32, ptr %base, iXLen 0
%ld = load i32, ptr %addr
- %addr.1 = getelementptr i32, ptr %base, i32 -16
+ %addr.1 = getelementptr i32, ptr %base, iXLen -16
%res = add i32 %ld, %a
store i32 %res, ptr %addr.2
ret ptr %addr.1
}
define ptr @lwib(ptr %base, i32 %a) {
-; RV32XTHEADMEMIDX-LABEL: lwib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lwib a2, (a0), 4, 0
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT: sw a1, 4(a0)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lwib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lwib a2, (a0), 4, 0
-; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV64XTHEADMEMIDX-NEXT: sw a1, 4(a0)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i32, ptr %base, i32 1
+; CHECK-LABEL: lwib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lwib a2, (a0), 4, 0
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: sw a1, 4(a0)
+; CHECK-NEXT: ret
+ %addr = getelementptr i32, ptr %base, iXLen 1
%ld = load i32, ptr %addr
- %addr.1 = getelementptr i32, ptr %base, i32 2
+ %addr.1 = getelementptr i32, ptr %base, iXLen 2
%res = add i32 %ld, %a
store i32 %res, ptr %addr.1
ret ptr %addr
@@ -255,10 +173,10 @@ define ptr @lwuia(ptr %base, ptr %addr.2, i64 %a) {
; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1)
; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i32, ptr %base, i32 0
+ %addr = getelementptr i32, ptr %base, iXLen 0
%ld = load i32, ptr %addr
%zext = zext i32 %ld to i64
- %addr.1 = getelementptr i32, ptr %base, i32 -16
+ %addr.1 = getelementptr i32, ptr %base, iXLen -16
%res = add i64 %zext, %a
store i64 %res, ptr %addr.2
ret ptr %addr.1
@@ -281,7 +199,7 @@ define ptr @lwuib(ptr %base, i64 %a, ptr %addr.1) {
; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1
; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2)
; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i32, ptr %base, i32 1
+ %addr = getelementptr i32, ptr %base, iXLen 1
%ld = load i32, ptr %addr
%zext = zext i32 %ld to i64
%res = add i64 %zext, %a
@@ -309,9 +227,9 @@ define ptr @ldia(ptr %base, ptr %addr.2, i64 %a) {
; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1)
; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i64, ptr %base, i64 0
+ %addr = getelementptr i64, ptr %base, iXLen 0
%ld = load i64, ptr %addr
- %addr.1 = getelementptr i64, ptr %base, i64 -16
+ %addr.1 = getelementptr i64, ptr %base, iXLen -16
%res = add i64 %ld, %a
store i64 %res, ptr %addr.2
ret ptr %addr.1
@@ -336,117 +254,81 @@ define ptr @ldib(ptr %base, i64 %a) {
; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1
; RV64XTHEADMEMIDX-NEXT: sd a1, 8(a0)
; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i64, ptr %base, i64 1
+ %addr = getelementptr i64, ptr %base, iXLen 1
%ld = load i64, ptr %addr
- %addr.1 = getelementptr i64, ptr %base, i64 2
+ %addr.1 = getelementptr i64, ptr %base, iXLen 2
%res = add i64 %ld, %a
store i64 %res, ptr %addr.1
ret ptr %addr
}
define ptr @sbia(ptr %base, i8 %a, i8 %b) {
-; RV32XTHEADMEMIDX-LABEL: sbia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.sbia a1, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: sbia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.sbia a1, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i8, ptr %base, i8 1
+; CHECK-LABEL: sbia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.sbia a1, (a0), 1, 0
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i8, ptr %base, iXLen 1
%res = add i8 %a, %b
store i8 %res, ptr %base
ret ptr %addr.1
}
define ptr @sbib(ptr %base, i8 %a, i8 %b) {
-; RV32XTHEADMEMIDX-LABEL: sbib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.sbib a1, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: sbib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.sbib a1, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i8, ptr %base, i8 1
+; CHECK-LABEL: sbib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.sbib a1, (a0), 1, 0
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i8, ptr %base, iXLen 1
%res = add i8 %a, %b
store i8 %res, ptr %addr.1
ret ptr %addr.1
}
define ptr @shia(ptr %base, i16 %a, i16 %b) {
-; RV32XTHEADMEMIDX-LABEL: shia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.shia a1, (a0), -9, 1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: shia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.shia a1, (a0), -9, 1
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i16, ptr %base, i16 -9
+; CHECK-LABEL: shia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.shia a1, (a0), -9, 1
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i16, ptr %base, iXLen -9
%res = add i16 %a, %b
store i16 %res, ptr %base
ret ptr %addr.1
}
define ptr @shib(ptr %base, i16 %a, i16 %b) {
-; RV32XTHEADMEMIDX-LABEL: shib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.shib a1, (a0), 2, 0
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: shib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.shib a1, (a0), 2, 0
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i16, ptr %base, i16 1
+; CHECK-LABEL: shib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.shib a1, (a0), 2, 0
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i16, ptr %base, iXLen 1
%res = add i16 %a, %b
store i16 %res, ptr %addr.1
ret ptr %addr.1
}
define ptr @swia(ptr %base, i32 %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: swia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.swia a1, (a0), 8, 2
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: swia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.swia a1, (a0), 8, 2
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i32, ptr %base, i32 8
+; CHECK-LABEL: swia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.swia a1, (a0), 8, 2
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i32, ptr %base, iXLen 8
%res = add i32 %a, %b
store i32 %res, ptr %base
ret ptr %addr.1
}
define ptr @swib(ptr %base, i32 %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: swib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.swib a1, (a0), -13, 3
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: swib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.swib a1, (a0), -13, 3
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i32, ptr %base, i32 -26
+; CHECK-LABEL: swib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.swib a1, (a0), -13, 3
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i32, ptr %base, iXLen -26
%res = add i32 %a, %b
store i32 %res, ptr %addr.1
ret ptr %addr.1
@@ -470,7 +352,7 @@ define ptr @sdia(ptr %base, i64 %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
; RV64XTHEADMEMIDX-NEXT: th.sdia a1, (a0), 8, 3
; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i64, ptr %base, i64 8
+ %addr.1 = getelementptr i64, ptr %base, iXLen 8
%res = add i64 %a, %b
store i64 %res, ptr %base
ret ptr %addr.1
@@ -492,48 +374,33 @@ define ptr @sdib(ptr %base, i64 %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
; RV64XTHEADMEMIDX-NEXT: th.sdib a1, (a0), 8, 0
; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i64, ptr %base, i64 1
+ %addr.1 = getelementptr i64, ptr %base, iXLen 1
%res = add i64 %a, %b
store i64 %res, ptr %addr.1
ret ptr %addr.1
}
-define i8 @lrb_anyext(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrb_anyext:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrb_anyext:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i8, ptr %a, i64 %b
+define i8 @lrb_anyext(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrb_anyext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrb a0, a0, a1, 0
+; CHECK-NEXT: ret
+ %1 = getelementptr i8, ptr %a, iXLen %b
%2 = load i8, ptr %1, align 1
ret i8 %2
}
-define i64 @lrb(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrb:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrb a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrb:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i8, ptr %a, i64 %b
+define i32 @lrb(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrb:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrb a0, a0, a1, 0
+; CHECK-NEXT: add a0, a0, a0
+; CHECK-NEXT: ret
+ %1 = getelementptr i8, ptr %a, iXLen %b
%2 = load i8, ptr %1, align 1
- %3 = sext i8 %2 to i64
- %4 = add i64 %3, %3
- ret i64 %4
+ %3 = sext i8 %2 to i32
+ %4 = add i32 %3, %3
+ ret i32 %4
}
define i8 @lurb_anyext(ptr %a, i32 %b) {
@@ -552,15 +419,11 @@ define i8 @lurb_anyext(ptr %a, i32 %b) {
ret i8 %3
}
-define i64 @lurb(ptr %a, i32 %b) {
+define i32 @lurb(ptr %a, i32 %b) {
; RV32XTHEADMEMIDX-LABEL: lurb:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrb a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
+; RV32XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0
+; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lurb:
@@ -571,37 +434,29 @@ define i64 @lurb(ptr %a, i32 %b) {
%1 = zext i32 %b to i64
%2 = getelementptr i8, ptr %a, i64 %1
%3 = load i8, ptr %2, align 1
- %4 = sext i8 %3 to i64
- %5 = add i64 %4, %4
- ret i64 %5
-}
-
-define i64 @lrbu(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrbu:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrbu a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrbu:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrbu a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i8, ptr %a, i64 %b
+ %4 = sext i8 %3 to i32
+ %5 = add i32 %4, %4
+ ret i32 %5
+}
+
+define i32 @lrbu(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrbu:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrbu a0, a0, a1, 0
+; CHECK-NEXT: add a0, a0, a0
+; CHECK-NEXT: ret
+ %1 = getelementptr i8, ptr %a, iXLen %b
%2 = load i8, ptr %1, align 1
- %3 = zext i8 %2 to i64
- %4 = add i64 %3, %3
- ret i64 %4
+ %3 = zext i8 %2 to i32
+ %4 = add i32 %3, %3
+ ret i32 %4
}
-define i64 @lurbu(ptr %a, i32 %b) {
+define i32 @lurbu(ptr %a, i32 %b) {
; RV32XTHEADMEMIDX-LABEL: lurbu:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrbu a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMIDX-NEXT: th.lrbu a0, a0, a1, 0
+; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lurbu:
@@ -612,47 +467,32 @@ define i64 @lurbu(ptr %a, i32 %b) {
%1 = zext i32 %b to i64
%2 = getelementptr i8, ptr %a, i64 %1
%3 = load i8, ptr %2, align 1
- %4 = zext i8 %3 to i64
- %5 = add i64 %4, %4
- ret i64 %5
+ %4 = zext i8 %3 to i32
+ %5 = add i32 %4, %4
+ ret i32 %5
}
-define i16 @lrh_anyext(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrh_anyext:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrh_anyext:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i16, ptr %a, i64 %b
+define i16 @lrh_anyext(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrh_anyext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrh a0, a0, a1, 1
+; CHECK-NEXT: ret
+ %1 = getelementptr i16, ptr %a, iXLen %b
%2 = load i16, ptr %1, align 2
ret i16 %2
}
-define i64 @lrh(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrh:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrh a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrh:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i16, ptr %a, i64 %b
+define i32 @lrh(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrh a0, a0, a1, 1
+; CHECK-NEXT: add a0, a0, a0
+; CHECK-NEXT: ret
+ %1 = getelementptr i16, ptr %a, iXLen %b
%2 = load i16, ptr %1, align 2
- %3 = sext i16 %2 to i64
- %4 = add i64 %3, %3
- ret i64 %4
+ %3 = sext i16 %2 to i32
+ %4 = add i32 %3, %3
+ ret i32 %4
}
define i16 @lurh_anyext(ptr %a, i32 %b) {
@@ -671,15 +511,11 @@ define i16 @lurh_anyext(ptr %a, i32 %b) {
ret i16 %3
}
-define i64 @lurh(ptr %a, i32 %b) {
+define i32 @lurh(ptr %a, i32 %b) {
; RV32XTHEADMEMIDX-LABEL: lurh:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrh a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
+; RV32XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1
+; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lurh:
@@ -690,37 +526,29 @@ define i64 @lurh(ptr %a, i32 %b) {
%1 = zext i32 %b to i64
%2 = getelementptr i16, ptr %a, i64 %1
%3 = load i16, ptr %2, align 2
- %4 = sext i16 %3 to i64
- %5 = add i64 %4, %4
- ret i64 %5
-}
-
-define i64 @lrhu(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrhu:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrhu a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrhu:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrhu a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i16, ptr %a, i64 %b
+ %4 = sext i16 %3 to i32
+ %5 = add i32 %4, %4
+ ret i32 %5
+}
+
+define i32 @lrhu(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrhu:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrhu a0, a0, a1, 1
+; CHECK-NEXT: add a0, a0, a0
+; CHECK-NEXT: ret
+ %1 = getelementptr i16, ptr %a, iXLen %b
%2 = load i16, ptr %1, align 2
- %3 = zext i16 %2 to i64
- %4 = add i64 %3, %3
- ret i64 %4
+ %3 = zext i16 %2 to i32
+ %4 = add i32 %3, %3
+ ret i32 %4
}
-define i64 @lurhu(ptr %a, i32 %b) {
+define i32 @lurhu(ptr %a, i32 %b) {
; RV32XTHEADMEMIDX-LABEL: lurhu:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrhu a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMIDX-NEXT: th.lrhu a0, a0, a1, 1
+; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lurhu:
@@ -731,27 +559,22 @@ define i64 @lurhu(ptr %a, i32 %b) {
%1 = zext i32 %b to i64
%2 = getelementptr i16, ptr %a, i64 %1
%3 = load i16, ptr %2, align 2
- %4 = zext i16 %3 to i64
- %5 = add i64 %4, %4
- ret i64 %5
+ %4 = zext i16 %3 to i32
+ %5 = add i32 %4, %4
+ ret i32 %5
}
-define i32 @lrw_anyext(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrw_anyext:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrw a0, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrw_anyext:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrw a0, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i32, ptr %a, i64 %b
+define i32 @lrw_anyext(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrw_anyext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrw a0, a0, a1, 2
+; CHECK-NEXT: ret
+ %1 = getelementptr i32, ptr %a, iXLen %b
%2 = load i32, ptr %1, align 4
ret i32 %2
}
-define i64 @lrw(ptr %a, i64 %b) {
+define i64 @lrw(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrw:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 2
@@ -767,7 +590,7 @@ define i64 @lrw(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: th.lrw a0, a0, a1, 2
; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i32, ptr %a, i64 %b
+ %1 = getelementptr i32, ptr %a, iXLen %b
%2 = load i32, ptr %1, align 4
%3 = sext i32 %2 to i64
%4 = add i64 %3, %3
@@ -814,7 +637,7 @@ define i64 @lurw(ptr %a, i32 %b) {
ret i64 %5
}
-define i64 @lrwu(ptr %a, i64 %b) {
+define i64 @lrwu(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrwu:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 2
@@ -827,7 +650,7 @@ define i64 @lrwu(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: th.lrwu a0, a0, a1, 2
; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i32, ptr %a, i64 %b
+ %1 = getelementptr i32, ptr %a, iXLen %b
%2 = load i32, ptr %1, align 4
%3 = zext i32 %2 to i64
%4 = add i64 %3, %3
@@ -855,7 +678,7 @@ define i64 @lurwu(ptr %a, i32 %b) {
ret i64 %5
}
-define i64 @lrd(ptr %a, i64 %b) {
+define i64 @lrd(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrd:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: th.lrw a2, a0, a1, 3
@@ -872,13 +695,13 @@ define i64 @lrd(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: th.lrd a0, a0, a1, 3
; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i64, ptr %a, i64 %b
+ %1 = getelementptr i64, ptr %a, iXLen %b
%2 = load i64, ptr %1, align 8
%3 = add i64 %2, %2
ret i64 %3
}
-define i64 @lrd_2(ptr %a, i64 %b) {
+define i64 @lrd_2(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrd_2:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: addi a2, a0, 96
@@ -897,8 +720,8 @@ define i64 @lrd_2(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: th.lrd a0, a0, a1, 3
; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = add i64 %b, 12
- %2 = getelementptr i64, ptr %a, i64 %1
+ %1 = add iXLen %b, 12
+ %2 = getelementptr i64, ptr %a, iXLen %1
%3 = load i64, ptr %2, align 8
%4 = add i64 %3, %3
ret i64 %4
@@ -928,20 +751,14 @@ define i64 @lurd(ptr %a, i32 %b) {
ret i64 %4
}
-define void @srb(ptr %a, i64 %b, i8 %c) {
-; RV32XTHEADMEMIDX-LABEL: srb:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3
-; RV32XTHEADMEMIDX-NEXT: th.srb a3, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: srb:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT: th.srb a2, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT: ret
+define void @srb(ptr %a, iXLen %b, i8 %c) {
+; CHECK-LABEL: srb:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a2, a2, a2
+; CHECK-NEXT: th.srb a2, a0, a1, 0
+; CHECK-NEXT: ret
%1 = add i8 %c, %c
- %2 = getelementptr i8, ptr %a, i64 %b
+ %2 = getelementptr i8, ptr %a, iXLen %b
store i8 %1, ptr %2, align 1
ret void
}
@@ -965,20 +782,14 @@ define void @surb(ptr %a, i32 %b, i8 %c) {
ret void
}
-define void @srh(ptr %a, i64 %b, i16 %c) {
-; RV32XTHEADMEMIDX-LABEL: srh:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3
-; RV32XTHEADMEMIDX-NEXT: th.srh a3, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: srh:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT: th.srh a2, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT: ret
+define void @srh(ptr %a, iXLen %b, i16 %c) {
+; CHECK-LABEL: srh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a2, a2, a2
+; CHECK-NEXT: th.srh a2, a0, a1, 1
+; CHECK-NEXT: ret
%1 = add i16 %c, %c
- %2 = getelementptr i16, ptr %a, i64 %b
+ %2 = getelementptr i16, ptr %a, iXLen %b
store i16 %1, ptr %2, align 2
ret void
}
@@ -1002,20 +813,14 @@ define void @surh(ptr %a, i32 %b, i16 %c) {
ret void
}
-define void @srw(ptr %a, i64 %b, i32 %c) {
-; RV32XTHEADMEMIDX-LABEL: srw:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3
-; RV32XTHEADMEMIDX-NEXT: th.srw a3, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: srw:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT: ret
+define void @srw(ptr %a, iXLen %b, i32 %c) {
+; CHECK-LABEL: srw:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a2, a2, a2
+; CHECK-NEXT: th.srw a2, a0, a1, 2
+; CHECK-NEXT: ret
%1 = add i32 %c, %c
- %2 = getelementptr i32, ptr %a, i64 %b
+ %2 = getelementptr i32, ptr %a, iXLen %b
store i32 %1, ptr %2, align 4
ret void
}
@@ -1039,16 +844,16 @@ define void @surw(ptr %a, i32 %b, i32 %c) {
ret void
}
-define void @srd(ptr %a, i64 %b, i64 %c) {
+define void @srd(ptr %a, iXLen %b, i64 %c) {
; RV32XTHEADMEMIDX-LABEL: srd:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a2, a3, a3
-; RV32XTHEADMEMIDX-NEXT: add a4, a4, a4
-; RV32XTHEADMEMIDX-NEXT: sltu a3, a2, a3
-; RV32XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: add a3, a4, a3
+; RV32XTHEADMEMIDX-NEXT: add a4, a2, a2
+; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3
+; RV32XTHEADMEMIDX-NEXT: sltu a2, a4, a2
+; RV32XTHEADMEMIDX-NEXT: th.srw a4, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2
; RV32XTHEADMEMIDX-NEXT: addi a0, a0, 4
-; RV32XTHEADMEMIDX-NEXT: th.srw a3, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 3
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: srd:
@@ -1057,7 +862,7 @@ define void @srd(ptr %a, i64 %b, i64 %c) {
; RV64XTHEADMEMIDX-NEXT: th.srd a2, a0, a1, 3
; RV64XTHEADMEMIDX-NEXT: ret
%1 = add i64 %c, %c
- %2 = getelementptr i64, ptr %a, i64 %b
+ %2 = getelementptr i64, ptr %a, iXLen %b
store i64 %1, ptr %2, align 8
ret void
}
@@ -1087,24 +892,18 @@ define void @surd(ptr %a, i32 %b, i64 %c) {
}
define ptr @test_simm5(ptr %base, i32 %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: test_simm5:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.swia a1, (a0), -12, 2
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: test_simm5:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.swia a1, (a0), -12, 2
-; RV64XTHEADMEMIDX-NEXT: ret
+; CHECK-LABEL: test_simm5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.swia a1, (a0), -12, 2
+; CHECK-NEXT: ret
%addr.1 = getelementptr i32, ptr %base, i32 -12
%res = add i32 %a, %b
store i32 %res, ptr %base
ret ptr %addr.1
}
-define i64 @lrd_large_shift(ptr %a, i64 %b) {
+define i64 @lrd_large_shift(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrd_large_shift:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 5
@@ -1119,14 +918,14 @@ define i64 @lrd_large_shift(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: add a0, a1, a0
; RV64XTHEADMEMIDX-NEXT: ld a0, 384(a0)
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = add i64 %b, 12
- %2 = shl i64 %1, 2
- %3 = getelementptr i64, ptr %a, i64 %2
+ %1 = add iXLen %b, 12
+ %2 = shl iXLen %1, 2
+ %3 = getelementptr i64, ptr %a, iXLen %2
%4 = load i64, ptr %3, align 8
ret i64 %4
}
-define i64 @lrd_large_offset(ptr %a, i64 %b) {
+define i64 @lrd_large_offset(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrd_large_offset:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3
@@ -1145,8 +944,8 @@ define i64 @lrd_large_offset(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: add a0, a0, a1
; RV64XTHEADMEMIDX-NEXT: ld a0, 1792(a0)
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = add i64 %b, 12000
- %2 = getelementptr i64, ptr %a, i64 %1
+ %1 = add iXLen %b, 12000
+ %2 = getelementptr i64, ptr %a, iXLen %1
%3 = load i64, ptr %2, align 8
ret i64 %3
}
diff --git a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll
index f9db686..1ef37f7 100644
--- a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll
+++ b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll
@@ -242,7 +242,7 @@ define void @foo7(ptr nocapture %p) nounwind {
; RV64ZDINX: # %bb.0: # %entry
; RV64ZDINX-NEXT: lui a1, %hi(d)
; RV64ZDINX-NEXT: addi a2, a1, %lo(d)
-; RV64ZDINX-NEXT: lwu a2, 8(a2)
+; RV64ZDINX-NEXT: lw a2, 8(a2)
; RV64ZDINX-NEXT: lwu a1, %lo(d+4)(a1)
; RV64ZDINX-NEXT: slli a2, a2, 32
; RV64ZDINX-NEXT: or a1, a2, a1
@@ -337,7 +337,7 @@ define void @foo9(ptr nocapture %p) nounwind {
; RV64ZDINX: # %bb.0: # %entry
; RV64ZDINX-NEXT: lui a1, %hi(e)
; RV64ZDINX-NEXT: addi a2, a1, %lo(e)
-; RV64ZDINX-NEXT: lwu a2, 4(a2)
+; RV64ZDINX-NEXT: lw a2, 4(a2)
; RV64ZDINX-NEXT: lwu a1, %lo(e)(a1)
; RV64ZDINX-NEXT: slli a2, a2, 32
; RV64ZDINX-NEXT: or a1, a2, a1
@@ -480,7 +480,7 @@ define double @foo13(ptr nocapture %p) nounwind {
; RV64ZDINX-LABEL: foo13:
; RV64ZDINX: # %bb.0: # %entry
; RV64ZDINX-NEXT: lui a0, %hi(f)
-; RV64ZDINX-NEXT: lwu a1, %lo(f+8)(a0)
+; RV64ZDINX-NEXT: lw a1, %lo(f+8)(a0)
; RV64ZDINX-NEXT: lwu a0, %lo(f+4)(a0)
; RV64ZDINX-NEXT: slli a1, a1, 32
; RV64ZDINX-NEXT: or a0, a1, a0