aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/RISCV
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
-rw-r--r--llvm/test/CodeGen/RISCV/fpclamptosat.ll88
-rw-r--r--llvm/test/CodeGen/RISCV/interrupt-attr.ll5616
-rw-r--r--llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll36
-rw-r--r--llvm/test/CodeGen/RISCV/pr148084.ll279
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll144
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll104
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll344
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir50
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll35
-rw-r--r--llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll1901
-rw-r--r--llvm/test/CodeGen/RISCV/xqcisls.ll47
-rw-r--r--llvm/test/CodeGen/RISCV/xtheadfmemidx.ll128
-rw-r--r--llvm/test/CodeGen/RISCV/xtheadmemidx.ll775
13 files changed, 2112 insertions, 7435 deletions
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 246e6a6..117e3e4 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -3292,30 +3292,30 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IF-NEXT: mv a1, a0
; RV32IF-NEXT: addi a0, sp, 8
; RV32IF-NEXT: call __fixdfti
-; RV32IF-NEXT: lw a0, 8(sp)
-; RV32IF-NEXT: lw a1, 12(sp)
-; RV32IF-NEXT: lw a2, 20(sp)
+; RV32IF-NEXT: lw a0, 20(sp)
+; RV32IF-NEXT: lw a1, 8(sp)
+; RV32IF-NEXT: lw a2, 12(sp)
; RV32IF-NEXT: lw a3, 16(sp)
-; RV32IF-NEXT: beqz a2, .LBB47_2
+; RV32IF-NEXT: beqz a0, .LBB47_2
; RV32IF-NEXT: # %bb.1: # %entry
-; RV32IF-NEXT: slti a4, a2, 0
+; RV32IF-NEXT: slti a4, a0, 0
; RV32IF-NEXT: j .LBB47_3
; RV32IF-NEXT: .LBB47_2:
; RV32IF-NEXT: seqz a4, a3
; RV32IF-NEXT: .LBB47_3: # %entry
; RV32IF-NEXT: xori a3, a3, 1
-; RV32IF-NEXT: or a3, a3, a2
+; RV32IF-NEXT: or a3, a3, a0
; RV32IF-NEXT: seqz a3, a3
; RV32IF-NEXT: addi a3, a3, -1
; RV32IF-NEXT: and a3, a3, a4
; RV32IF-NEXT: neg a3, a3
+; RV32IF-NEXT: and a2, a3, a2
; RV32IF-NEXT: and a1, a3, a1
; RV32IF-NEXT: and a0, a3, a0
-; RV32IF-NEXT: and a2, a3, a2
-; RV32IF-NEXT: slti a2, a2, 0
-; RV32IF-NEXT: addi a2, a2, -1
-; RV32IF-NEXT: and a0, a2, a0
-; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: slti a0, a0, 0
+; RV32IF-NEXT: addi a3, a0, -1
+; RV32IF-NEXT: and a0, a3, a1
+; RV32IF-NEXT: and a1, a3, a2
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: .cfi_restore ra
; RV32IF-NEXT: addi sp, sp, 32
@@ -3354,30 +3354,30 @@ define i64 @ustest_f64i64_mm(double %x) {
; RV32IFD-NEXT: .cfi_offset ra, -4
; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call __fixdfti
-; RV32IFD-NEXT: lw a0, 8(sp)
-; RV32IFD-NEXT: lw a1, 12(sp)
-; RV32IFD-NEXT: lw a2, 20(sp)
+; RV32IFD-NEXT: lw a0, 20(sp)
+; RV32IFD-NEXT: lw a1, 8(sp)
+; RV32IFD-NEXT: lw a2, 12(sp)
; RV32IFD-NEXT: lw a3, 16(sp)
-; RV32IFD-NEXT: beqz a2, .LBB47_2
+; RV32IFD-NEXT: beqz a0, .LBB47_2
; RV32IFD-NEXT: # %bb.1: # %entry
-; RV32IFD-NEXT: slti a4, a2, 0
+; RV32IFD-NEXT: slti a4, a0, 0
; RV32IFD-NEXT: j .LBB47_3
; RV32IFD-NEXT: .LBB47_2:
; RV32IFD-NEXT: seqz a4, a3
; RV32IFD-NEXT: .LBB47_3: # %entry
; RV32IFD-NEXT: xori a3, a3, 1
-; RV32IFD-NEXT: or a3, a3, a2
+; RV32IFD-NEXT: or a3, a3, a0
; RV32IFD-NEXT: seqz a3, a3
; RV32IFD-NEXT: addi a3, a3, -1
; RV32IFD-NEXT: and a3, a3, a4
; RV32IFD-NEXT: neg a3, a3
+; RV32IFD-NEXT: and a2, a3, a2
; RV32IFD-NEXT: and a1, a3, a1
; RV32IFD-NEXT: and a0, a3, a0
-; RV32IFD-NEXT: and a2, a3, a2
-; RV32IFD-NEXT: slti a2, a2, 0
-; RV32IFD-NEXT: addi a2, a2, -1
-; RV32IFD-NEXT: and a0, a2, a0
-; RV32IFD-NEXT: and a1, a2, a1
+; RV32IFD-NEXT: slti a0, a0, 0
+; RV32IFD-NEXT: addi a3, a0, -1
+; RV32IFD-NEXT: and a0, a3, a1
+; RV32IFD-NEXT: and a1, a3, a2
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: .cfi_restore ra
; RV32IFD-NEXT: addi sp, sp, 32
@@ -3530,30 +3530,30 @@ define i64 @ustest_f32i64_mm(float %x) {
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a1, 8(sp)
+; RV32-NEXT: lw a2, 12(sp)
; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: beqz a2, .LBB50_2
+; RV32-NEXT: beqz a0, .LBB50_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a4, a2, 0
+; RV32-NEXT: slti a4, a0, 0
; RV32-NEXT: j .LBB50_3
; RV32-NEXT: .LBB50_2:
; RV32-NEXT: seqz a4, a3
; RV32-NEXT: .LBB50_3: # %entry
; RV32-NEXT: xori a3, a3, 1
-; RV32-NEXT: or a3, a3, a2
+; RV32-NEXT: or a3, a3, a0
; RV32-NEXT: seqz a3, a3
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a3, a3, a4
; RV32-NEXT: neg a3, a3
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: and a1, a3, a1
; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a2, a3, a2
-; RV32-NEXT: slti a2, a2, 0
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: slti a0, a0, 0
+; RV32-NEXT: addi a3, a0, -1
+; RV32-NEXT: and a0, a3, a1
+; RV32-NEXT: and a1, a3, a2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: addi sp, sp, 32
@@ -3767,30 +3767,30 @@ define i64 @ustest_f16i64_mm(half %x) {
; RV32-NEXT: call __extendhfsf2
; RV32-NEXT: addi a0, sp, 8
; RV32-NEXT: call __fixsfti
-; RV32-NEXT: lw a0, 8(sp)
-; RV32-NEXT: lw a1, 12(sp)
-; RV32-NEXT: lw a2, 20(sp)
+; RV32-NEXT: lw a0, 20(sp)
+; RV32-NEXT: lw a1, 8(sp)
+; RV32-NEXT: lw a2, 12(sp)
; RV32-NEXT: lw a3, 16(sp)
-; RV32-NEXT: beqz a2, .LBB53_2
+; RV32-NEXT: beqz a0, .LBB53_2
; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: slti a4, a2, 0
+; RV32-NEXT: slti a4, a0, 0
; RV32-NEXT: j .LBB53_3
; RV32-NEXT: .LBB53_2:
; RV32-NEXT: seqz a4, a3
; RV32-NEXT: .LBB53_3: # %entry
; RV32-NEXT: xori a3, a3, 1
-; RV32-NEXT: or a3, a3, a2
+; RV32-NEXT: or a3, a3, a0
; RV32-NEXT: seqz a3, a3
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a3, a3, a4
; RV32-NEXT: neg a3, a3
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: and a1, a3, a1
; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a2, a3, a2
-; RV32-NEXT: slti a2, a2, 0
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: slti a0, a0, 0
+; RV32-NEXT: addi a3, a0, -1
+; RV32-NEXT: and a0, a3, a1
+; RV32-NEXT: and a1, a3, a2
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: addi sp, sp, 32
diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll
index e278b8d..472b903 100644
--- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll
+++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll
@@ -794,498 +794,46 @@ define void @foo_with_call() #1 {
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: sub sp, sp, a0
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 5
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: mv a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 1
; CHECK-RV32-V-NEXT: add a0, a0, a1
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 4
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: addi a0, sp, 16
-; CHECK-RV32-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: call otherfoo
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 5
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: mv a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: add a0, a0, a1
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 4
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: addi a0, sp, 16
-; CHECK-RV32-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: add sp, sp, a0
@@ -1351,498 +899,46 @@ define void @foo_with_call() #1 {
; CHECK-RV32-FV-NEXT: slli a0, a0, 5
; CHECK-RV32-FV-NEXT: sub sp, sp, a0
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: mv a1, a0
; CHECK-RV32-FV-NEXT: slli a0, a0, 1
; CHECK-RV32-FV-NEXT: add a0, a0, a1
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 4
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: addi a0, sp, 16
-; CHECK-RV32-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: call otherfoo
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: mv a1, a0
; CHECK-RV32-FV-NEXT: slli a0, a0, 1
; CHECK-RV32-FV-NEXT: add a0, a0, a1
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 4
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: add a0, sp, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: add a0, sp, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: addi a0, sp, 16
-; CHECK-RV32-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 5
; CHECK-RV32-FV-NEXT: add sp, sp, a0
@@ -1928,498 +1024,46 @@ define void @foo_with_call() #1 {
; CHECK-RV32-FDV-NEXT: slli a0, a0, 5
; CHECK-RV32-FDV-NEXT: sub sp, sp, a0
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: mv a1, a0
; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: add a0, a0, a1
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 4
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: addi a0, sp, 16
-; CHECK-RV32-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: call otherfoo
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: mv a1, a0
; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
; CHECK-RV32-FDV-NEXT: add a0, a0, a1
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 4
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: add a0, sp, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: add a0, sp, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: addi a0, sp, 16
-; CHECK-RV32-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 5
; CHECK-RV32-FDV-NEXT: add sp, sp, a0
@@ -3259,498 +1903,46 @@ define void @foo_with_call() #1 {
; CHECK-RV64-V-NEXT: slli a0, a0, 5
; CHECK-RV64-V-NEXT: sub sp, sp, a0
; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 5
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: mv a1, a0
; CHECK-RV64-V-NEXT: slli a0, a0, 1
; CHECK-RV64-V-NEXT: add a0, a0, a1
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 4
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: addi a0, sp, 16
-; CHECK-RV64-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: call otherfoo
; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 5
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: mv a1, a0
; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: add a0, a0, a1
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 4
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: add a0, sp, a0
; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: add a0, sp, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, 16
-; CHECK-RV64-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: addi a0, sp, 16
-; CHECK-RV64-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 5
; CHECK-RV64-V-NEXT: add sp, sp, a0
@@ -3816,498 +2008,46 @@ define void @foo_with_call() #1 {
; CHECK-RV64-FV-NEXT: slli a0, a0, 5
; CHECK-RV64-FV-NEXT: sub sp, sp, a0
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: mv a1, a0
; CHECK-RV64-FV-NEXT: slli a0, a0, 1
; CHECK-RV64-FV-NEXT: add a0, a0, a1
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 4
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: addi a0, sp, 16
-; CHECK-RV64-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: call otherfoo
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: mv a1, a0
; CHECK-RV64-FV-NEXT: slli a0, a0, 1
; CHECK-RV64-FV-NEXT: add a0, a0, a1
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 4
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: add a0, sp, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: add a0, sp, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: addi a0, sp, 16
-; CHECK-RV64-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 5
; CHECK-RV64-FV-NEXT: add sp, sp, a0
@@ -4393,498 +2133,46 @@ define void @foo_with_call() #1 {
; CHECK-RV64-FDV-NEXT: slli a0, a0, 5
; CHECK-RV64-FDV-NEXT: sub sp, sp, a0
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: mv a1, a0
; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: add a0, a0, a1
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 4
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: addi a0, sp, 16
-; CHECK-RV64-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: call otherfoo
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: mv a1, a0
; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
; CHECK-RV64-FDV-NEXT: add a0, a0, a1
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 4
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: add a0, sp, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: add a0, sp, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: addi a0, sp, 16
-; CHECK-RV64-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 5
; CHECK-RV64-FDV-NEXT: add sp, sp, a0
@@ -5670,422 +2958,39 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: sub sp, sp, a0
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 4
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 1
; CHECK-RV32-V-NEXT: add a0, a0, a1
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 5
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: call otherfoo
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 4
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: mv a1, a0
@@ -6093,81 +2998,12 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-V-NEXT: add a0, a0, a1
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 5
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: sub a0, s0, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: sub a0, s0, a0
; CHECK-RV32-V-NEXT: addi a0, a0, -80
-; CHECK-RV32-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-V-NEXT: addi sp, s0, -80
; CHECK-RV32-V-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
; CHECK-RV32-V-NEXT: lw t0, 72(sp) # 4-byte Folded Reload
@@ -6234,172 +3070,15 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FV-NEXT: slli a0, a0, 5
; CHECK-RV32-FV-NEXT: sub sp, sp, a0
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 4
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: mv a1, a0
@@ -6407,331 +3086,36 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FV-NEXT: add a0, a0, a1
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 5
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FV-NEXT: call otherfoo
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 4
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FV-NEXT: add a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
; CHECK-RV32-FV-NEXT: slli a0, a0, 1
; CHECK-RV32-FV-NEXT: add a0, a0, a1
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: mv a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a1, a1, a0
-; CHECK-RV32-FV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FV-NEXT: add a0, a0, a1
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: csrr a0, vlenb
; CHECK-RV32-FV-NEXT: slli a0, a0, 5
; CHECK-RV32-FV-NEXT: sub a0, s0, a0
; CHECK-RV32-FV-NEXT: addi a0, a0, -160
-; CHECK-RV32-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FV-NEXT: addi sp, s0, -160
; CHECK-RV32-FV-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; CHECK-RV32-FV-NEXT: lw t0, 152(sp) # 4-byte Folded Reload
@@ -6818,172 +3202,15 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FDV-NEXT: slli a0, a0, 5
; CHECK-RV32-FDV-NEXT: sub sp, sp, a0
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 4
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: mv a1, a0
@@ -6991,249 +3218,23 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FDV-NEXT: add a0, a0, a1
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 5
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-FDV-NEXT: call otherfoo
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 4
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV32-FDV-NEXT: add a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
; CHECK-RV32-FDV-NEXT: mv a1, a0
@@ -7241,81 +3242,12 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV32-FDV-NEXT: add a0, a0, a1
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: mv a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a1, a1, a0
-; CHECK-RV32-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV32-FDV-NEXT: add a0, a0, a1
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV32-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV32-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: csrr a0, vlenb
; CHECK-RV32-FDV-NEXT: slli a0, a0, 5
; CHECK-RV32-FDV-NEXT: sub a0, s0, a0
; CHECK-RV32-FDV-NEXT: addi a0, a0, -240
-; CHECK-RV32-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-FDV-NEXT: addi sp, s0, -240
; CHECK-RV32-FDV-NEXT: lw ra, 236(sp) # 4-byte Folded Reload
; CHECK-RV32-FDV-NEXT: lw t0, 232(sp) # 4-byte Folded Reload
@@ -8186,422 +4118,39 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-V-NEXT: slli a0, a0, 5
; CHECK-RV64-V-NEXT: sub sp, sp, a0
; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 4
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
; CHECK-RV64-V-NEXT: slli a0, a0, 1
; CHECK-RV64-V-NEXT: add a0, a0, a1
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 5
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 5
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-V-NEXT: call otherfoo
; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 4
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 4
-; CHECK-RV64-V-NEXT: add a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 3
; CHECK-RV64-V-NEXT: mv a1, a0
@@ -8609,81 +4158,12 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-V-NEXT: add a0, a0, a1
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 3
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 2
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: mv a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a1, a1, a0
-; CHECK-RV64-V-NEXT: slli a0, a0, 1
-; CHECK-RV64-V-NEXT: add a0, a0, a1
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-V-NEXT: csrr a0, vlenb
-; CHECK-RV64-V-NEXT: slli a1, a0, 5
-; CHECK-RV64-V-NEXT: sub a0, a1, a0
-; CHECK-RV64-V-NEXT: sub a0, s0, a0
-; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: csrr a0, vlenb
; CHECK-RV64-V-NEXT: slli a0, a0, 5
; CHECK-RV64-V-NEXT: sub a0, s0, a0
; CHECK-RV64-V-NEXT: addi a0, a0, -160
-; CHECK-RV64-V-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-V-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-V-NEXT: addi sp, s0, -160
; CHECK-RV64-V-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
; CHECK-RV64-V-NEXT: ld t0, 144(sp) # 8-byte Folded Reload
@@ -8750,172 +4230,15 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FV-NEXT: slli a0, a0, 5
; CHECK-RV64-FV-NEXT: sub sp, sp, a0
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 4
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: mv a1, a0
@@ -8923,331 +4246,36 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FV-NEXT: add a0, a0, a1
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 5
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FV-NEXT: call otherfoo
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 4
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FV-NEXT: add a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
; CHECK-RV64-FV-NEXT: slli a0, a0, 1
; CHECK-RV64-FV-NEXT: add a0, a0, a1
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: mv a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a1, a1, a0
-; CHECK-RV64-FV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FV-NEXT: add a0, a0, a1
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: csrr a0, vlenb
; CHECK-RV64-FV-NEXT: slli a0, a0, 5
; CHECK-RV64-FV-NEXT: sub a0, s0, a0
; CHECK-RV64-FV-NEXT: addi a0, a0, -240
-; CHECK-RV64-FV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FV-NEXT: addi sp, s0, -240
; CHECK-RV64-FV-NEXT: ld ra, 232(sp) # 8-byte Folded Reload
; CHECK-RV64-FV-NEXT: ld t0, 224(sp) # 8-byte Folded Reload
@@ -9334,172 +4362,15 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FDV-NEXT: slli a0, a0, 5
; CHECK-RV64-FDV-NEXT: sub sp, sp, a0
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 4
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: mv a1, a0
@@ -9507,249 +4378,23 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FDV-NEXT: add a0, a0, a1
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 5
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV64-FDV-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV64-FDV-NEXT: call otherfoo
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 4
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 4
-; CHECK-RV64-FDV-NEXT: add a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
; CHECK-RV64-FDV-NEXT: mv a1, a0
@@ -9757,81 +4402,12 @@ define void @foo_fp_with_call() #2 {
; CHECK-RV64-FDV-NEXT: add a0, a0, a1
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 3
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v25, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v26, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v27, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 2
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v28, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: mv a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a1, a1, a0
-; CHECK-RV64-FDV-NEXT: slli a0, a0, 1
-; CHECK-RV64-FDV-NEXT: add a0, a0, a1
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v29, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
-; CHECK-RV64-FDV-NEXT: slli a1, a0, 5
-; CHECK-RV64-FDV-NEXT: sub a0, a1, a0
-; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
-; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v30, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: csrr a0, vlenb
; CHECK-RV64-FDV-NEXT: slli a0, a0, 5
; CHECK-RV64-FDV-NEXT: sub a0, s0, a0
; CHECK-RV64-FDV-NEXT: addi a0, a0, -320
-; CHECK-RV64-FDV-NEXT: vl1r.v v31, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV64-FDV-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV64-FDV-NEXT: addi sp, s0, -320
; CHECK-RV64-FDV-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
; CHECK-RV64-FDV-NEXT: ld t0, 304(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
index 87c8343..a06c750 100644
--- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
+++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
@@ -7,18 +7,18 @@
define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
; RV32-LABEL: ctz_nxv4i32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV32-NEXT: vid.v v10
-; RV32-NEXT: vmv.v.i v11, -1
; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; RV32-NEXT: vid.v v10
+; RV32-NEXT: li a1, -1
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vmsne.vi v0, v8, 0
; RV32-NEXT: srli a0, a0, 1
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vmacc.vv v8, v10, v11
-; RV32-NEXT: vmv.v.i v9, 0
-; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
+; RV32-NEXT: vmadd.vx v10, a1, v8
+; RV32-NEXT: vmv.v.i v8, 0
+; RV32-NEXT: vmerge.vvm v8, v8, v10, v0
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: sub a0, a0, a1
@@ -28,18 +28,18 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
;
; RV64-LABEL: ctz_nxv4i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; RV64-NEXT: vid.v v10
-; RV64-NEXT: vmv.v.i v11, -1
; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; RV64-NEXT: vid.v v10
+; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: srli a0, a0, 1
; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vmacc.vv v8, v10, v11
-; RV64-NEXT: vmv.v.i v9, 0
-; RV64-NEXT: vmerge.vvm v8, v9, v8, v0
+; RV64-NEXT: vmadd.vx v10, a1, v8
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vmerge.vvm v8, v8, v10, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: sub a0, a0, a1
@@ -109,17 +109,17 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
;
; RV64-LABEL: ctz_nxv8i1_no_range:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: vmv.v.i v24, -1
; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV64-NEXT: vid.v v16
+; RV64-NEXT: li a1, -1
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vmacc.vv v8, v16, v24
-; RV64-NEXT: vmv.v.i v16, 0
-; RV64-NEXT: vmerge.vvm v8, v16, v8, v0
+; RV64-NEXT: vmadd.vx v16, a1, v8
+; RV64-NEXT: vmv.v.i v8, 0
+; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: sub a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/pr148084.ll b/llvm/test/CodeGen/RISCV/pr148084.ll
new file mode 100644
index 0000000..9fa26c7
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr148084.ll
@@ -0,0 +1,279 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+source_filename = "external/libaom/av1/encoder/tx_search.c"
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-linux-android10000"
+
+define fastcc void @search_tx_type() #0 {
+; CHECK-LABEL: search_tx_type:
+; CHECK: # %bb.0: # %._crit_edge.i
+; CHECK-NEXT: # %bb.1: # %bb
+; CHECK-NEXT: lbu a1, 0(zero)
+; CHECK-NEXT: lw a0, 0(zero)
+; CHECK-NEXT: lh a2, 0(zero)
+; CHECK-NEXT: seqz a1, a1
+; CHECK-NEXT: srai a3, a0, 63
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: andi a2, a1, 1
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: or a3, a3, a0
+; CHECK-NEXT: or a2, a2, a3
+; CHECK-NEXT: bgez a2, .LBB0_3
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: bexti a3, a1, 1
+; CHECK-NEXT: addi a3, a3, -1
+; CHECK-NEXT: and a2, a2, a3
+; CHECK-NEXT: .LBB0_3: # %bb
+; CHECK-NEXT: andi a4, a1, 4
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: beqz a4, .LBB0_5
+; CHECK-NEXT: # %bb.4: # %bb
+; CHECK-NEXT: mv a3, a0
+; CHECK-NEXT: .LBB0_5: # %bb
+; CHECK-NEXT: blt a2, a0, .LBB0_7
+; CHECK-NEXT: # %bb.6: # %bb
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: .LBB0_7: # %bb
+; CHECK-NEXT: andi a5, a1, 8
+; CHECK-NEXT: sext.w a4, a3
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: beqz a5, .LBB0_9
+; CHECK-NEXT: # %bb.8: # %bb
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: .LBB0_9: # %bb
+; CHECK-NEXT: blt a4, a0, .LBB0_11
+; CHECK-NEXT: # %bb.10: # %bb
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB0_11: # %bb
+; CHECK-NEXT: andi a5, a1, 16
+; CHECK-NEXT: sext.w a4, a2
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: beqz a5, .LBB0_13
+; CHECK-NEXT: # %bb.12: # %bb
+; CHECK-NEXT: mv a3, a0
+; CHECK-NEXT: .LBB0_13: # %bb
+; CHECK-NEXT: blt a4, a0, .LBB0_15
+; CHECK-NEXT: # %bb.14: # %bb
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: .LBB0_15: # %bb
+; CHECK-NEXT: andi a5, a1, 32
+; CHECK-NEXT: sext.w a4, a3
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: beqz a5, .LBB0_17
+; CHECK-NEXT: # %bb.16: # %bb
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: .LBB0_17: # %bb
+; CHECK-NEXT: blt a4, a0, .LBB0_19
+; CHECK-NEXT: # %bb.18: # %bb
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB0_19: # %bb
+; CHECK-NEXT: andi a5, a1, 64
+; CHECK-NEXT: sext.w a4, a2
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: beqz a5, .LBB0_21
+; CHECK-NEXT: # %bb.20: # %bb
+; CHECK-NEXT: mv a3, a0
+; CHECK-NEXT: .LBB0_21: # %bb
+; CHECK-NEXT: blt a4, a0, .LBB0_23
+; CHECK-NEXT: # %bb.22: # %bb
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: .LBB0_23: # %bb
+; CHECK-NEXT: andi a5, a1, 128
+; CHECK-NEXT: sext.w a4, a3
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: beqz a5, .LBB0_25
+; CHECK-NEXT: # %bb.24: # %bb
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: .LBB0_25: # %bb
+; CHECK-NEXT: blt a4, a0, .LBB0_27
+; CHECK-NEXT: # %bb.26: # %bb
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB0_27: # %bb
+; CHECK-NEXT: andi a5, a1, 256
+; CHECK-NEXT: sext.w a4, a2
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: beqz a5, .LBB0_29
+; CHECK-NEXT: # %bb.28: # %bb
+; CHECK-NEXT: mv a3, a0
+; CHECK-NEXT: .LBB0_29: # %bb
+; CHECK-NEXT: blt a4, a0, .LBB0_31
+; CHECK-NEXT: # %bb.30: # %bb
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: .LBB0_31: # %bb
+; CHECK-NEXT: andi a5, a1, 512
+; CHECK-NEXT: sext.w a4, a3
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: beqz a5, .LBB0_33
+; CHECK-NEXT: # %bb.32: # %bb
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: .LBB0_33: # %bb
+; CHECK-NEXT: blt a4, a0, .LBB0_35
+; CHECK-NEXT: # %bb.34: # %bb
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB0_35: # %bb
+; CHECK-NEXT: andi a5, a1, 1024
+; CHECK-NEXT: sext.w a4, a2
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: beqz a5, .LBB0_37
+; CHECK-NEXT: # %bb.36: # %bb
+; CHECK-NEXT: mv a3, a0
+; CHECK-NEXT: .LBB0_37: # %bb
+; CHECK-NEXT: blt a4, a0, .LBB0_39
+; CHECK-NEXT: # %bb.38: # %bb
+; CHECK-NEXT: mv a3, a2
+; CHECK-NEXT: .LBB0_39: # %bb
+; CHECK-NEXT: slli a5, a1, 52
+; CHECK-NEXT: sext.w a4, a3
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: bgez a5, .LBB0_41
+; CHECK-NEXT: # %bb.40: # %bb
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: .LBB0_41: # %bb
+; CHECK-NEXT: blt a4, a0, .LBB0_43
+; CHECK-NEXT: # %bb.42: # %bb
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB0_43: # %bb
+; CHECK-NEXT: slli a4, a1, 51
+; CHECK-NEXT: sext.w a3, a2
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: bltz a4, .LBB0_49
+; CHECK-NEXT: # %bb.44: # %bb
+; CHECK-NEXT: bge a3, a0, .LBB0_50
+; CHECK-NEXT: .LBB0_45: # %bb
+; CHECK-NEXT: sext.w a2, a1
+; CHECK-NEXT: blt a2, a0, .LBB0_47
+; CHECK-NEXT: .LBB0_46: # %bb
+; CHECK-NEXT: mv a0, a1
+; CHECK-NEXT: .LBB0_47: # %bb
+; CHECK-NEXT: sext.w a0, a0
+; CHECK-NEXT: # %bb.48: # %get_tx_mask.exit
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB0_49: # %bb
+; CHECK-NEXT: mv a1, a0
+; CHECK-NEXT: blt a3, a0, .LBB0_45
+; CHECK-NEXT: .LBB0_50: # %bb
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: sext.w a2, a2
+; CHECK-NEXT: bge a2, a0, .LBB0_46
+; CHECK-NEXT: j .LBB0_47
+._crit_edge.i:
+ %.in196.i = load i16, ptr null, align 2
+ %i2 = load i16, ptr null, align 2
+ %i3 = and i16 %i2, %.in196.i
+ %i9 = trunc nuw i8 0 to i1
+ br i1 %i9, label %get_tx_mask.exit, label %bb
+
+bb: ; preds = %._crit_edge.i
+ %i13 = load i8, ptr null, align 1
+ %i14 = icmp eq i8 %i13, 0
+ %spec.select211.i = select i1 %i14, i16 0, i16 %i3
+ %i19 = load i32, ptr null, align 4
+ %i20 = zext i16 %spec.select211.i to i32
+ %i21 = load i32, ptr null, align 4
+ %i22 = icmp sgt i32 %i21, -1
+ %i23 = and i32 %i20, 1
+ %.not203.i = icmp eq i32 %i23, 0
+ %spec.select212.i = select i1 %.not203.i, i32 -1, i32 %i21
+ %.1174.i = select i1 %i22, i32 %spec.select212.i, i32 -1
+ %i28 = icmp sgt i32 0, %.1174.i
+ %i29 = and i32 %i20, 2
+ %.not203.1.not.i = icmp eq i32 %i29, 0
+ %spec.select212.1.i = select i1 %.not203.1.not.i, i32 %.1174.i, i32 0
+ %.1174.1.i = select i1 %i28, i32 %spec.select212.1.i, i32 %.1174.i
+ %i30 = load i32, ptr null, align 4
+ %i31 = icmp sgt i32 %i30, %.1174.1.i
+ %i32 = and i32 %i20, 4
+ %.not203.2.i = icmp eq i32 %i32, 0
+ %spec.select212.2.i = select i1 %.not203.2.i, i32 %.1174.1.i, i32 %i30
+ %.1174.2.i = select i1 %i31, i32 %spec.select212.2.i, i32 %.1174.1.i
+ %i36 = load i32, ptr null, align 4
+ %i37 = icmp sgt i32 %i36, %.1174.2.i
+ %i38 = and i32 %i20, 8
+ %.not203.3.i = icmp eq i32 %i38, 0
+ %spec.select212.3.i = select i1 %.not203.3.i, i32 %.1174.2.i, i32 %i36
+ %.1174.3.i = select i1 %i37, i32 %spec.select212.3.i, i32 %.1174.2.i
+ %i42 = load i32, ptr null, align 4
+ %i43 = icmp sgt i32 %i42, %.1174.3.i
+ %i44 = and i32 %i20, 16
+ %.not203.4.i = icmp eq i32 %i44, 0
+ %spec.select212.4.i = select i1 %.not203.4.i, i32 %.1174.3.i, i32 %i42
+ %.1174.4.i = select i1 %i43, i32 %spec.select212.4.i, i32 %.1174.3.i
+ %i48 = load i32, ptr null, align 4
+ %i49 = icmp sgt i32 %i48, %.1174.4.i
+ %i50 = and i32 %i20, 32
+ %.not203.5.i = icmp eq i32 %i50, 0
+ %spec.select212.5.i = select i1 %.not203.5.i, i32 %.1174.4.i, i32 %i48
+ %.1174.5.i = select i1 %i49, i32 %spec.select212.5.i, i32 %.1174.4.i
+ %i51 = load i32, ptr null, align 4
+ %i52 = icmp sgt i32 %i51, %.1174.5.i
+ %i53 = and i32 %i20, 64
+ %.not203.6.i = icmp eq i32 %i53, 0
+ %spec.select212.6.i = select i1 %.not203.6.i, i32 %.1174.5.i, i32 %i51
+ %.1174.6.i = select i1 %i52, i32 %spec.select212.6.i, i32 %.1174.5.i
+ %i56 = load i32, ptr null, align 4
+ %i57 = icmp sgt i32 %i56, %.1174.6.i
+ %i58 = and i32 %i20, 128
+ %.not203.7.i = icmp eq i32 %i58, 0
+ %spec.select212.7.i = select i1 %.not203.7.i, i32 %.1174.6.i, i32 %i56
+ %.1174.7.i = select i1 %i57, i32 %spec.select212.7.i, i32 %.1174.6.i
+ %i60 = load i32, ptr null, align 4
+ %i61 = icmp sgt i32 %i60, %.1174.7.i
+ %i62 = and i32 %i20, 256
+ %.not203.8.i = icmp eq i32 %i62, 0
+ %spec.select212.8.i = select i1 %.not203.8.i, i32 %.1174.7.i, i32 %i60
+ %.1174.8.i = select i1 %i61, i32 %spec.select212.8.i, i32 %.1174.7.i
+ %i63 = load i32, ptr null, align 4
+ %i64 = icmp sgt i32 %i63, %.1174.8.i
+ %i65 = and i32 %i20, 512
+ %.not203.9.i = icmp eq i32 %i65, 0
+ %spec.select212.9.i = select i1 %.not203.9.i, i32 %.1174.8.i, i32 %i63
+ %.1174.9.i = select i1 %i64, i32 %spec.select212.9.i, i32 %.1174.8.i
+ %i67 = load i32, ptr null, align 4
+ %i68 = icmp sgt i32 %i67, %.1174.9.i
+ %i69 = and i32 %i20, 1024
+ %.not203.10.i = icmp eq i32 %i69, 0
+ %spec.select212.10.i = select i1 %.not203.10.i, i32 %.1174.9.i, i32 %i67
+ %.1174.10.i = select i1 %i68, i32 %spec.select212.10.i, i32 %.1174.9.i
+ %i70 = load i32, ptr null, align 4
+ %i71 = icmp sgt i32 %i70, %.1174.10.i
+ %i72 = and i32 %i20, 2048
+ %.not203.11.i = icmp eq i32 %i72, 0
+ %spec.select212.11.i = select i1 %.not203.11.i, i32 %.1174.10.i, i32 %i70
+ %.1174.11.i = select i1 %i71, i32 %spec.select212.11.i, i32 %.1174.10.i
+ %i75 = load i32, ptr null, align 4
+ %i76 = icmp sgt i32 %i75, %.1174.11.i
+ %i77 = and i32 %i20, 4096
+ %.not203.12.i = icmp eq i32 %i77, 0
+ %spec.select212.12.i = select i1 %.not203.12.i, i32 %.1174.11.i, i32 %i75
+ %.1174.12.i = select i1 %i76, i32 %spec.select212.12.i, i32 %.1174.11.i
+ %i80 = load i32, ptr null, align 4
+ %i81 = icmp sgt i32 %i80, %.1174.12.i
+ %spec.select212.13.i = select i1 false, i32 %.1174.12.i, i32 %i80
+ %.1174.13.i = select i1 %i81, i32 %spec.select212.13.i, i32 %.1174.12.i
+ %.1172.13.i = select i1 %i81, i32 13, i32 0
+ %i84 = icmp sgt i32 0, %.1174.13.i
+ %.1172.14.i = select i1 %i84, i32 14, i32 %.1172.13.i
+ %i88 = icmp slt i32 0, %i19
+ %i89 = select i1 %i88, i16 -32768, i16 0
+ %i90 = zext i16 %i89 to i32
+ %i91 = shl nuw nsw i32 1, %.1172.14.i
+ %i92 = and i32 %i91, %i90
+ %.not200.i = icmp eq i32 %i92, 0
+ %i93 = trunc nuw i32 %i91 to i16
+ %i94 = xor i16 %i93, -1
+ %i95 = select i1 %.not200.i, i16 -1, i16 %i94
+ %.2177.i = and i16 %i95, %i89
+ %i96 = xor i16 %.2177.i, -1
+ %i97 = and i16 %spec.select211.i, %i96
+ br label %get_tx_mask.exit
+
+get_tx_mask.exit: ; preds = %._crit_edge.i, %bb
+ %.1261.i = phi i16 [ %i97, %bb ], [ 0, %._crit_edge.i ]
+ %i99 = icmp eq i16 %.1261.i, 0
+ %.2262.i = select i1 %i99, i16 0, i16 %.1261.i
+ ret void
+}
+
+attributes #0 = { noimplicitfloat nounwind sspstrong uwtable vscale_range(2,1024) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+b,+c,+d,+f,+m,+relax,+unaligned-scalar-mem,+unaligned-vector-mem,+v,+zaamo,+zalrsc,+zba,+zbb,+zbs,+zca,+zcd,+zicsr,+zifencei,+zmmul,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-e,-experimental-p,-experimental-smctr,-experimental-ssctr,-experimental-svukte,-experimental-xqccmp,-experimental-xqcia,-experimental-xqciac,-experimental-xqcibi,-experimental-xqcibm,-experimental-xqcicli,-experimental-xqcicm,-experimental-xqcics,-experimental-xqcicsr,-experimental-xqciint,-experimental-xqciio,-experimental-xqcilb,-experimental-xqcili,-experimental-xqcilia,-experimental-xqcilo,-experimental-xqcilsm,-experimental-xqcisim,-experimental-xqcisls,-experimental-xqcisync,-experimental-xrivosvisni,-experimental-xrivosvizip,-experimental-xsfmclic,-experimental-xsfsclic,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-experimental-zvqdotq,-h,-q,-sdext,-sdtrig,-sha,-shcounterenw,-shgatpa,-shlcofideleg,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcntrpmf,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-supm,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-xandesperf,-xandesvbfhcvt,-xandesvdot,-xandesvpackfph,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xmipscmov,-xmipslsp,-xsfcease,-xsfmm128t,-xsfmm16t,-xsfmm32a16f,-xsfmm32a32f,-xsfmm32a8f,-xsfmm32a8i,-xsfmm32t,-xsfmm64a64f,-xsfmm64t,-xsfmmbase,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zabha,-zacas,-zama16b,-zawrs,-zbc,-zbkb,-zbkc,-zbkx,-zcb,-zce,-zcf,-zclsd,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccamoc,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zilsd,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
index 96c349d..d166a6e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll
@@ -92,6 +92,150 @@ entry:
ret <vscale x 1 x i32> %va
}
+define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee2(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O2-LABEL: test_vector_callee2:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 12
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: sub sp, sp, a0
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 11
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 10
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 3
+; SPILL-O2-NEXT: add a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 3
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 11
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 10
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 3
+; SPILL-O2-NEXT: add a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 3
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 12
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ call void asm sideeffect "",
+ "~{v1},~{v3},~{v5},~{v7},~{v24m2},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+
+ ret <vscale x 1 x i32> %va
+}
+
+define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee3(<vscale x 1 x i32> %va) nounwind {
+; SPILL-O2-LABEL: test_vector_callee3:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 10
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: sub sp, sp, a0
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 3
+; SPILL-O2-NEXT: add a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 3
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 6
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs2r.v v2, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 2
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vs2r.v v26, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vs4r.v v28, (a0) # vscale x 32-byte Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a1, a0, 3
+; SPILL-O2-NEXT: add a0, a1, a0
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 3
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl1r.v v24, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 6
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl2r.v v2, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 2
+; SPILL-O2-NEXT: add a0, sp, a0
+; SPILL-O2-NEXT: addi a0, a0, 16
+; SPILL-O2-NEXT: vl2r.v v26, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vl4r.v v28, (a0) # vscale x 32-byte Folded Reload
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: li a1, 10
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ call void asm sideeffect "",
+ "~{v1},~{v2},~{v3},~{v24},~{v26m2},~{v28m2},~{v29},~{v30},~{v31}"()
+
+ ret <vscale x 1 x i32> %va
+}
+
; Make sure the local stack allocation pass doesn't count vector registers. The
; sizes are chosen to be on the edge of what RISCVRegister::needsFrameBaseReg
; considers to need a virtual base register.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index bdf344d..7274e1b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -190,6 +190,20 @@ define {<4 x i32>, <4 x i32>} @vpload_factor2(ptr %ptr) {
ret {<4 x i32>, <4 x i32>} %res1
}
+define {<4 x i32>, <4 x i32>} @vpload_factor2_interleaved_mask_intrinsic(ptr %ptr, <4 x i1> %m) {
+; CHECK-LABEL: vpload_factor2_interleaved_mask_intrinsic:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+ %interleaved.mask = call <8 x i1> @llvm.vector.interleave2(<4 x i1> %m, <4 x i1> %m)
+ %interleaved.vec = tail call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %interleaved.mask, i32 8)
+ %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
+ %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
+ ret {<4 x i32>, <4 x i32>} %res1
+}
define {<4 x i32>, <4 x i32>, <4 x i32>} @vpload_factor3(ptr %ptr) {
; CHECK-LABEL: vpload_factor3:
@@ -423,8 +437,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: li a2, 32
; RV32-NEXT: lui a3, 12
; RV32-NEXT: lui a6, 12291
-; RV32-NEXT: lui a7, %hi(.LCPI20_0)
-; RV32-NEXT: addi a7, a7, %lo(.LCPI20_0)
+; RV32-NEXT: lui a7, %hi(.LCPI21_0)
+; RV32-NEXT: addi a7, a7, %lo(.LCPI21_0)
; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32-NEXT: vle32.v v24, (a5)
; RV32-NEXT: vmv.s.x v0, a3
@@ -509,12 +523,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
; RV32-NEXT: lui a7, 49164
-; RV32-NEXT: lui a1, %hi(.LCPI20_1)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI20_1)
+; RV32-NEXT: lui a1, %hi(.LCPI21_1)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI21_1)
; RV32-NEXT: lui t2, 3
; RV32-NEXT: lui t1, 196656
-; RV32-NEXT: lui a4, %hi(.LCPI20_3)
-; RV32-NEXT: addi a4, a4, %lo(.LCPI20_3)
+; RV32-NEXT: lui a4, %hi(.LCPI21_3)
+; RV32-NEXT: addi a4, a4, %lo(.LCPI21_3)
; RV32-NEXT: lui t0, 786624
; RV32-NEXT: li a5, 48
; RV32-NEXT: lui a6, 768
@@ -693,8 +707,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; RV32-NEXT: vrgatherei16.vv v24, v8, v2
-; RV32-NEXT: lui a1, %hi(.LCPI20_2)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI20_2)
+; RV32-NEXT: lui a1, %hi(.LCPI21_2)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI21_2)
; RV32-NEXT: lui a3, 3073
; RV32-NEXT: addi a3, a3, -1024
; RV32-NEXT: vmv.s.x v0, a3
@@ -758,16 +772,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vrgatherei16.vv v28, v8, v3
; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma
; RV32-NEXT: vmv.v.v v28, v24
-; RV32-NEXT: lui a1, %hi(.LCPI20_4)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI20_4)
-; RV32-NEXT: lui a2, %hi(.LCPI20_5)
-; RV32-NEXT: addi a2, a2, %lo(.LCPI20_5)
+; RV32-NEXT: lui a1, %hi(.LCPI21_4)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI21_4)
+; RV32-NEXT: lui a2, %hi(.LCPI21_5)
+; RV32-NEXT: addi a2, a2, %lo(.LCPI21_5)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV32-NEXT: vle16.v v24, (a2)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: lui a1, %hi(.LCPI20_7)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI20_7)
+; RV32-NEXT: lui a1, %hi(.LCPI21_7)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI21_7)
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vle16.v v10, (a1)
; RV32-NEXT: csrr a1, vlenb
@@ -795,14 +809,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV32-NEXT: vl8r.v v0, (a1) # vscale x 64-byte Folded Reload
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vrgatherei16.vv v16, v0, v10
-; RV32-NEXT: lui a1, %hi(.LCPI20_6)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI20_6)
-; RV32-NEXT: lui a2, %hi(.LCPI20_8)
-; RV32-NEXT: addi a2, a2, %lo(.LCPI20_8)
+; RV32-NEXT: lui a1, %hi(.LCPI21_6)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI21_6)
+; RV32-NEXT: lui a2, %hi(.LCPI21_8)
+; RV32-NEXT: addi a2, a2, %lo(.LCPI21_8)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v4, (a1)
-; RV32-NEXT: lui a1, %hi(.LCPI20_9)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI20_9)
+; RV32-NEXT: lui a1, %hi(.LCPI21_9)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI21_9)
; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV32-NEXT: vle16.v v6, (a1)
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
@@ -889,8 +903,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: li a4, 128
; RV64-NEXT: lui a1, 1
; RV64-NEXT: vle64.v v8, (a3)
-; RV64-NEXT: lui a3, %hi(.LCPI20_0)
-; RV64-NEXT: addi a3, a3, %lo(.LCPI20_0)
+; RV64-NEXT: lui a3, %hi(.LCPI21_0)
+; RV64-NEXT: addi a3, a3, %lo(.LCPI21_0)
; RV64-NEXT: vmv.s.x v0, a4
; RV64-NEXT: csrr a4, vlenb
; RV64-NEXT: li a5, 61
@@ -1078,8 +1092,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vl8r.v v16, (a2) # vscale x 64-byte Folded Reload
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vslideup.vi v12, v16, 1, v0.t
-; RV64-NEXT: lui a2, %hi(.LCPI20_1)
-; RV64-NEXT: addi a2, a2, %lo(.LCPI20_1)
+; RV64-NEXT: lui a2, %hi(.LCPI21_1)
+; RV64-NEXT: addi a2, a2, %lo(.LCPI21_1)
; RV64-NEXT: li a3, 192
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: vle16.v v6, (a2)
@@ -1113,8 +1127,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vrgatherei16.vv v24, v16, v6
; RV64-NEXT: addi a2, sp, 16
; RV64-NEXT: vs8r.v v24, (a2) # vscale x 64-byte Folded Spill
-; RV64-NEXT: lui a2, %hi(.LCPI20_2)
-; RV64-NEXT: addi a2, a2, %lo(.LCPI20_2)
+; RV64-NEXT: lui a2, %hi(.LCPI21_2)
+; RV64-NEXT: addi a2, a2, %lo(.LCPI21_2)
; RV64-NEXT: li a3, 1040
; RV64-NEXT: vmv.s.x v0, a3
; RV64-NEXT: addi a1, a1, -2016
@@ -1198,12 +1212,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: add a1, sp, a1
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs4r.v v8, (a1) # vscale x 32-byte Folded Spill
-; RV64-NEXT: lui a1, %hi(.LCPI20_3)
-; RV64-NEXT: addi a1, a1, %lo(.LCPI20_3)
+; RV64-NEXT: lui a1, %hi(.LCPI21_3)
+; RV64-NEXT: addi a1, a1, %lo(.LCPI21_3)
; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64-NEXT: vle16.v v20, (a1)
-; RV64-NEXT: lui a1, %hi(.LCPI20_4)
-; RV64-NEXT: addi a1, a1, %lo(.LCPI20_4)
+; RV64-NEXT: lui a1, %hi(.LCPI21_4)
+; RV64-NEXT: addi a1, a1, %lo(.LCPI21_4)
; RV64-NEXT: vle16.v v8, (a1)
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 77
@@ -1254,8 +1268,8 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
; RV64-NEXT: vl2r.v v8, (a1) # vscale x 16-byte Folded Reload
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64-NEXT: vrgatherei16.vv v0, v16, v8
-; RV64-NEXT: lui a1, %hi(.LCPI20_5)
-; RV64-NEXT: addi a1, a1, %lo(.LCPI20_5)
+; RV64-NEXT: lui a1, %hi(.LCPI21_5)
+; RV64-NEXT: addi a1, a1, %lo(.LCPI21_5)
; RV64-NEXT: vle16.v v20, (a1)
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: li a2, 61
@@ -1472,6 +1486,19 @@ define void @vpstore_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
ret void
}
+define void @vpstore_factor2_interleaved_mask_intrinsic(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i1> %m) {
+; CHECK-LABEL: vpstore_factor2_interleaved_mask_intrinsic:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vsseg2e32.v v8, (a0), v0.t
+; CHECK-NEXT: ret
+ %interleaved.mask = call <8 x i1> @llvm.vector.interleave2(<4 x i1> %m, <4 x i1> %m)
+ %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+ tail call void @llvm.vp.store.v8i32.p0(<8 x i32> %interleaved.vec, ptr %ptr, <8 x i1> %interleaved.mask, i32 8)
+ ret void
+}
+
+
define void @vpstore_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
; CHECK-LABEL: vpstore_factor3:
; CHECK: # %bb.0:
@@ -1757,8 +1784,9 @@ define void @store_factor4_one_active(ptr %ptr, <4 x i32> %v) {
define void @vpstore_factor4_one_active(ptr %ptr, <4 x i32> %v) {
; CHECK-LABEL: vpstore_factor4_one_active:
; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vsseg4e32.v v8, (a0)
+; CHECK-NEXT: vsse32.v v8, (a0), a1
; CHECK-NEXT: ret
%v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
tail call void @llvm.vp.store.v16i32.p0(<16 x i32> %v0, ptr %ptr, <16 x i1> splat (i1 true), i32 16)
@@ -1782,7 +1810,7 @@ define void @store_factor4_one_active_fullwidth(ptr %ptr, <16 x i32> %v) {
; CHECK-LABEL: store_factor4_one_active_fullwidth:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: vsetivli zero, 4, e32, m4, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsse32.v v8, (a0), a1
; CHECK-NEXT: ret
%v0 = shufflevector <16 x i32> %v, <16 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
@@ -1839,8 +1867,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_mask(ptr %ptr) {
; RV32-NEXT: vle32.v v12, (a0), v0.t
; RV32-NEXT: li a0, 36
; RV32-NEXT: vmv.s.x v20, a1
-; RV32-NEXT: lui a1, %hi(.LCPI54_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI54_0)
+; RV32-NEXT: lui a1, %hi(.LCPI56_0)
+; RV32-NEXT: addi a1, a1, %lo(.LCPI56_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v21, (a1)
; RV32-NEXT: vcompress.vm v8, v12, v11
@@ -1915,8 +1943,8 @@ define {<4 x i32>, <4 x i32>, <4 x i32>} @invalid_vp_evl(ptr %ptr) {
; RV32-NEXT: vmv.s.x v10, a0
; RV32-NEXT: li a0, 146
; RV32-NEXT: vmv.s.x v11, a0
-; RV32-NEXT: lui a0, %hi(.LCPI55_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI55_0)
+; RV32-NEXT: lui a0, %hi(.LCPI57_0)
+; RV32-NEXT: addi a0, a0, %lo(.LCPI57_0)
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vle16.v v20, (a0)
; RV32-NEXT: li a0, 36
diff --git a/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll b/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll
index af2e8d3..42c2556 100644
--- a/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/interrupt-attr-nocall.ll
@@ -14,12 +14,8 @@ define void @foo_lmul1() nounwind #0 {
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 1
; CHECK-RV32-NEXT: sub sp, sp, a0
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; CHECK-RV32-NEXT: lui a0, %hi(a)
; CHECK-RV32-NEXT: addi a0, a0, %lo(a)
; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
@@ -31,12 +27,8 @@ define void @foo_lmul1() nounwind #0 {
; CHECK-RV32-NEXT: lui a0, %hi(c)
; CHECK-RV32-NEXT: addi a0, a0, %lo(c)
; CHECK-RV32-NEXT: vse32.v v8, (a0)
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 1
; CHECK-RV32-NEXT: add sp, sp, a0
@@ -62,25 +54,8 @@ define void @foo_lmul2() nounwind #0 {
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 2
; CHECK-RV32-NEXT: sub sp, sp, a0
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; CHECK-RV32-NEXT: lui a0, %hi(d)
; CHECK-RV32-NEXT: addi a0, a0, %lo(d)
; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
@@ -92,25 +67,8 @@ define void @foo_lmul2() nounwind #0 {
; CHECK-RV32-NEXT: lui a0, %hi(f)
; CHECK-RV32-NEXT: addi a0, a0, %lo(f)
; CHECK-RV32-NEXT: vse32.v v8, (a0)
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 2
; CHECK-RV32-NEXT: add sp, sp, a0
@@ -136,56 +94,8 @@ define void @foo_lmul4() nounwind #0 {
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: sub sp, sp, a0
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: slli a1, a0, 2
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-NEXT: lui a0, %hi(g)
; CHECK-RV32-NEXT: addi a0, a0, %lo(g)
; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
@@ -197,50 +107,8 @@ define void @foo_lmul4() nounwind #0 {
; CHECK-RV32-NEXT: lui a0, %hi(i)
; CHECK-RV32-NEXT: addi a0, a0, %lo(i)
; CHECK-RV32-NEXT: vse32.v v8, (a0)
-; CHECK-RV32-NEXT: sw a1, 4(sp) # 4-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 2
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: lw a1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add sp, sp, a0
@@ -268,108 +136,12 @@ define void @foo_lmul8() nounwind #0 {
; CHECK-RV32-NEXT: slli a0, a0, 4
; CHECK-RV32-NEXT: sub sp, sp, a0
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 4
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 2
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-NEXT: lui a0, %hi(j)
; CHECK-RV32-NEXT: addi a0, a0, %lo(j)
; CHECK-RV32-NEXT: li a1, 32
@@ -383,108 +155,12 @@ define void @foo_lmul8() nounwind #0 {
; CHECK-RV32-NEXT: addi a0, a0, %lo(l)
; CHECK-RV32-NEXT: vse32.v v8, (a0)
; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 4
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v9, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v10, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v11, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a1, a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v12, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v13, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v14, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 3
; CHECK-RV32-NEXT: add a0, sp, a0
; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v15, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 3
-; CHECK-RV32-NEXT: sub a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v16, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: mv a1, a0
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, a0, a1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v17, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 2
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v18, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 2
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v19, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a1, a0, 1
-; CHECK-RV32-NEXT: add a0, a1, a0
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v20, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: slli a0, a0, 1
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v21, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-NEXT: csrr a0, vlenb
-; CHECK-RV32-NEXT: add a0, sp, a0
-; CHECK-RV32-NEXT: addi a0, a0, 16
-; CHECK-RV32-NEXT: vl1r.v v22, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-NEXT: addi a0, sp, 16
-; CHECK-RV32-NEXT: vl1r.v v23, (a0) # vscale x 8-byte Folded Reload
+; CHECK-RV32-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-RV32-NEXT: csrr a0, vlenb
; CHECK-RV32-NEXT: slli a0, a0, 4
; CHECK-RV32-NEXT: add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
index a050034..a7eaf39 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir
@@ -78,12 +78,12 @@ body: |
; CHECK-NEXT: %false:vrnov0 = COPY $v9
; CHECK-NEXT: %mask:vmv0 = COPY $v0
; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
- ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 8, 5 /* e32 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %pt, %true, 4, 5 /* e32 */, 0 /* tu, mu */
%pt:vrnov0 = COPY $v8
%false:vrnov0 = COPY $v9
%mask:vmv0 = COPY $v0
- %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
- %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 8, 5 /* e32 */
+ %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 8, 5 /* e32 */, 0 /* tu, mu */
+ %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 4, 5 /* e32 */
...
---
# Shouldn't be converted because false operands are different
@@ -163,3 +163,47 @@ body: |
%true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 4, 5 /* e32 */, 0 /* tu, mu */
bb.1:
%5:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, %mask, 4, 5 /* e32 */
+...
+---
+# Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
+name: preserve_false
+body: |
+ bb.0:
+ liveins: $v8, $v9, $v0, $x8, $x9
+ ; CHECK-LABEL: name: preserve_false
+ ; CHECK: liveins: $v8, $v9, $v0, $x8, $x9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %pt:vrnov0 = COPY $v8
+ ; CHECK-NEXT: %false:vr = COPY $v9
+ ; CHECK-NEXT: %mask:vmv0 = COPY $v0
+ ; CHECK-NEXT: %avl1:gprnox0 = COPY $x8
+ ; CHECK-NEXT: %avl2:gprnox0 = COPY $x9
+ ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
+ %pt:vrnov0 = COPY $v8
+ %false:vr = COPY $v9
+ %mask:vmv0 = COPY $v0
+ %avl1:gprnox0 = COPY $x8
+ %avl2:gprnox0 = COPY $x9
+ %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, %avl1, 5 /* e32 */, 3 /* ta, ma */
+ %5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, %avl2, 5 /* e32 */
+...
+---
+# But we can convert this one because vmerge's avl being <= true's means we don't lose any false elements past avl.
+name: preserve_false_avl_known_le
+body: |
+ bb.0:
+ liveins: $v8, $v9, $v0
+ ; CHECK-LABEL: name: preserve_false_avl_known_le
+ ; CHECK: liveins: $v8, $v9, $v0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %pt:vr = COPY $v8
+ ; CHECK-NEXT: %false:vrnov0 = COPY $v9
+ ; CHECK-NEXT: %mask:vmv0 = COPY $v0
+ ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $noreg, $noreg, %mask, 1, 5 /* e32 */, 3 /* ta, ma */
+ ; CHECK-NEXT: [[PseudoVMV_V_V_M1_:%[0-9]+]]:vr = PseudoVMV_V_V_M1 %pt, %true, 1, 5 /* e32 */, 0 /* tu, mu */
+ %pt:vrnov0 = COPY $v8
+ %false:vr = COPY $v9
+ %mask:vmv0 = COPY $v0
+ %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $noreg, $noreg, %mask, 2, 5 /* e32 */, 3 /* ta, ma */
+ %5:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, %mask, 1, 5 /* e32 */
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll
index 3aeb4e8..9ffc84a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll
@@ -71,10 +71,31 @@ define <vscale x 8 x i64> @vpmerge_m8(<vscale x 8 x i64> %x, <vscale x 8 x i64>
ret <vscale x 8 x i64> %1
}
-declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
-declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
-declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
-declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
-declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
-declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
-declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
+; Shouldn't be converted because vmerge adds back in elements from false past avl that would be lost if we converted to vmv.v.v
+define <vscale x 2 x i32> @preserve_false(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask, i64 %avl1, i64 %avl2) {
+; CHECK-LABEL: preserve_false:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v9
+; CHECK-NEXT: vle32.v v10, (a0), v0.t
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
+; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0
+; CHECK-NEXT: ret
+ %true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 %avl1, i64 3)
+ %res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 %avl2)
+ ret <vscale x 2 x i32> %res
+}
+
+; Can fold this because its avl is known to be <= than true, so no elements from false need to be introduced past avl.
+define <vscale x 2 x i32> @preserve_false_avl_known_le(ptr %p, <vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: preserve_false_avl_known_le:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v9, (a0), v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %true = call <vscale x 2 x i32> @llvm.riscv.vle.mask(<vscale x 2 x i32> %false, ptr %p, <vscale x 2 x i1> %mask, i64 2, i64 3)
+ %res = call <vscale x 2 x i32> @llvm.riscv.vmerge(<vscale x 2 x i32> %pt, <vscale x 2 x i32> %false, <vscale x 2 x i32> %true, <vscale x 2 x i1> %mask, i64 1)
+ ret <vscale x 2 x i32> %res
+}
diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
index 32753ca..cd7f30d 100644
--- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
@@ -716,92 +716,101 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: or a4, a6, a5
+; RV32I-NEXT: lbu a5, 8(a0)
+; RV32I-NEXT: lbu a6, 9(a0)
+; RV32I-NEXT: lbu t3, 10(a0)
+; RV32I-NEXT: lbu t4, 11(a0)
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or a4, a4, a3
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: or a3, t0, a7
-; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 9(a0)
-; RV32I-NEXT: lbu t0, 10(a0)
-; RV32I-NEXT: lbu t3, 11(a0)
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: slli t0, t0, 16
-; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a7, t3, t0
-; RV32I-NEXT: lbu t0, 12(a0)
-; RV32I-NEXT: lbu t2, 13(a0)
-; RV32I-NEXT: lbu t3, 14(a0)
-; RV32I-NEXT: lbu t4, 15(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: or t0, t2, t1
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: lbu a6, 12(a0)
+; RV32I-NEXT: lbu t1, 13(a0)
+; RV32I-NEXT: lbu t2, 14(a0)
+; RV32I-NEXT: lbu a0, 15(a0)
+; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t4, t4, 24
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: or a6, t1, a6
+; RV32I-NEXT: or a0, a0, t2
+; RV32I-NEXT: lbu t1, 1(a1)
+; RV32I-NEXT: lbu t2, 0(a1)
+; RV32I-NEXT: lbu t4, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t1, t1, t2
; RV32I-NEXT: sw zero, 16(sp)
; RV32I-NEXT: sw zero, 20(sp)
; RV32I-NEXT: sw zero, 24(sp)
; RV32I-NEXT: sw zero, 28(sp)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or a1, t2, t0
-; RV32I-NEXT: mv t0, sp
-; RV32I-NEXT: slli t3, t3, 16
-; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: srli t3, a0, 3
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: andi a5, a0, 31
-; RV32I-NEXT: andi t3, t3, 12
-; RV32I-NEXT: xori a5, a5, 31
-; RV32I-NEXT: or a3, t1, a3
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a1, t2, a1
-; RV32I-NEXT: add t0, t0, t3
-; RV32I-NEXT: sw a4, 0(sp)
-; RV32I-NEXT: sw a3, 4(sp)
-; RV32I-NEXT: sw a6, 8(sp)
-; RV32I-NEXT: sw a1, 12(sp)
-; RV32I-NEXT: lw a1, 4(t0)
-; RV32I-NEXT: lw a3, 8(t0)
-; RV32I-NEXT: lw a4, 0(t0)
-; RV32I-NEXT: lw a6, 12(t0)
-; RV32I-NEXT: srl a7, a1, a0
-; RV32I-NEXT: slli t0, a3, 1
-; RV32I-NEXT: srl a4, a4, a0
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: srl a3, a3, a0
-; RV32I-NEXT: slli t1, a6, 1
-; RV32I-NEXT: srl a0, a6, a0
-; RV32I-NEXT: sll a6, t0, a5
-; RV32I-NEXT: sll a1, a1, a5
-; RV32I-NEXT: sll a5, t1, a5
+; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or a1, a1, t4
+; RV32I-NEXT: mv t2, sp
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: or a4, t0, a7
+; RV32I-NEXT: or a5, t3, a5
+; RV32I-NEXT: or a0, a0, a6
+; RV32I-NEXT: or a1, a1, t1
+; RV32I-NEXT: sw a3, 0(sp)
+; RV32I-NEXT: sw a4, 4(sp)
+; RV32I-NEXT: sw a5, 8(sp)
+; RV32I-NEXT: sw a0, 12(sp)
+; RV32I-NEXT: srli a0, a1, 3
+; RV32I-NEXT: andi a3, a1, 31
+; RV32I-NEXT: andi a0, a0, 12
+; RV32I-NEXT: xori a3, a3, 31
+; RV32I-NEXT: add a0, t2, a0
+; RV32I-NEXT: lw a4, 4(a0)
+; RV32I-NEXT: lw a5, 8(a0)
+; RV32I-NEXT: lw a6, 0(a0)
+; RV32I-NEXT: lw a0, 12(a0)
+; RV32I-NEXT: srl a7, a4, a1
+; RV32I-NEXT: slli t0, a5, 1
+; RV32I-NEXT: srl a6, a6, a1
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: srl a5, a5, a1
+; RV32I-NEXT: slli t1, a0, 1
+; RV32I-NEXT: srl a0, a0, a1
+; RV32I-NEXT: sll a1, t0, a3
+; RV32I-NEXT: sll a4, a4, a3
+; RV32I-NEXT: sll a3, t1, a3
; RV32I-NEXT: srli t0, a0, 16
; RV32I-NEXT: srli t1, a0, 24
; RV32I-NEXT: srli t2, a0, 8
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a1, a4, a1
-; RV32I-NEXT: or a3, a3, a5
+; RV32I-NEXT: or a1, a7, a1
+; RV32I-NEXT: or a4, a6, a4
+; RV32I-NEXT: or a3, a5, a3
; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb t0, 14(a2)
; RV32I-NEXT: sb t1, 15(a2)
; RV32I-NEXT: srli a0, a3, 16
-; RV32I-NEXT: srli a4, a3, 24
-; RV32I-NEXT: srli a5, a3, 8
-; RV32I-NEXT: srli a7, a1, 16
-; RV32I-NEXT: srli t0, a1, 24
-; RV32I-NEXT: srli t1, a1, 8
-; RV32I-NEXT: srli t2, a6, 16
-; RV32I-NEXT: srli t3, a6, 24
+; RV32I-NEXT: srli a5, a3, 24
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: srli a7, a4, 16
+; RV32I-NEXT: srli t0, a4, 24
+; RV32I-NEXT: srli t1, a4, 8
+; RV32I-NEXT: srli t2, a1, 16
+; RV32I-NEXT: srli t3, a1, 24
; RV32I-NEXT: sb a3, 8(a2)
-; RV32I-NEXT: sb a5, 9(a2)
+; RV32I-NEXT: sb a6, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a0, a6, 8
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: sb a5, 11(a2)
+; RV32I-NEXT: srli a0, a1, 8
+; RV32I-NEXT: sb a4, 0(a2)
; RV32I-NEXT: sb t1, 1(a2)
; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: sb t0, 3(a2)
-; RV32I-NEXT: sb a6, 4(a2)
+; RV32I-NEXT: sb a1, 4(a2)
; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: sb t2, 6(a2)
; RV32I-NEXT: sb t3, 7(a2)
@@ -943,93 +952,102 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: or a4, a6, a5
+; RV32I-NEXT: lbu a5, 8(a0)
+; RV32I-NEXT: lbu a6, 9(a0)
+; RV32I-NEXT: lbu t3, 10(a0)
+; RV32I-NEXT: lbu t4, 11(a0)
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or a4, a4, a3
-; RV32I-NEXT: or a5, a6, a5
-; RV32I-NEXT: or a3, t0, a7
-; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 9(a0)
-; RV32I-NEXT: lbu t0, 10(a0)
-; RV32I-NEXT: lbu t3, 11(a0)
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: slli t0, t0, 16
-; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a7, t3, t0
-; RV32I-NEXT: lbu t0, 12(a0)
-; RV32I-NEXT: lbu t2, 13(a0)
-; RV32I-NEXT: lbu t3, 14(a0)
-; RV32I-NEXT: lbu t4, 15(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
+; RV32I-NEXT: slli a6, a6, 8
+; RV32I-NEXT: or a7, t0, a7
+; RV32I-NEXT: or t0, t2, t1
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: lbu a6, 12(a0)
+; RV32I-NEXT: lbu t1, 13(a0)
+; RV32I-NEXT: lbu t2, 14(a0)
+; RV32I-NEXT: lbu a0, 15(a0)
+; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t4, t4, 24
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or t3, t4, t3
+; RV32I-NEXT: or a6, t1, a6
+; RV32I-NEXT: or a0, a0, t2
+; RV32I-NEXT: lbu t1, 1(a1)
+; RV32I-NEXT: lbu t2, 0(a1)
+; RV32I-NEXT: lbu t4, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t1, t1, t2
; RV32I-NEXT: sw zero, 0(sp)
; RV32I-NEXT: sw zero, 4(sp)
; RV32I-NEXT: sw zero, 8(sp)
; RV32I-NEXT: sw zero, 12(sp)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or a1, t2, t0
-; RV32I-NEXT: addi t0, sp, 16
-; RV32I-NEXT: slli t3, t3, 16
-; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: srli t3, a0, 3
-; RV32I-NEXT: or a4, a5, a4
-; RV32I-NEXT: andi a5, a0, 31
-; RV32I-NEXT: andi t3, t3, 12
-; RV32I-NEXT: or a3, t1, a3
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a1, t2, a1
-; RV32I-NEXT: sub a7, t0, t3
-; RV32I-NEXT: sw a4, 16(sp)
-; RV32I-NEXT: sw a3, 20(sp)
-; RV32I-NEXT: sw a6, 24(sp)
-; RV32I-NEXT: sw a1, 28(sp)
-; RV32I-NEXT: lw a1, 0(a7)
-; RV32I-NEXT: lw a3, 4(a7)
-; RV32I-NEXT: lw a4, 8(a7)
-; RV32I-NEXT: lw a6, 12(a7)
-; RV32I-NEXT: xori a5, a5, 31
-; RV32I-NEXT: sll a7, a3, a0
-; RV32I-NEXT: srli t0, a1, 1
-; RV32I-NEXT: sll a6, a6, a0
-; RV32I-NEXT: srli t1, a4, 1
-; RV32I-NEXT: sll a4, a4, a0
-; RV32I-NEXT: srli a3, a3, 1
-; RV32I-NEXT: sll a0, a1, a0
-; RV32I-NEXT: srl a1, t0, a5
-; RV32I-NEXT: srl t0, t1, a5
-; RV32I-NEXT: srl a3, a3, a5
-; RV32I-NEXT: srli a5, a0, 16
-; RV32I-NEXT: srli t1, a0, 24
-; RV32I-NEXT: srli t2, a0, 8
-; RV32I-NEXT: or a1, a7, a1
-; RV32I-NEXT: or a6, a6, t0
+; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or a1, a1, t4
+; RV32I-NEXT: addi t2, sp, 16
; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: sb a0, 0(a2)
+; RV32I-NEXT: or a4, t0, a7
+; RV32I-NEXT: or a5, t3, a5
+; RV32I-NEXT: or a0, a0, a6
+; RV32I-NEXT: or a1, a1, t1
+; RV32I-NEXT: sw a3, 16(sp)
+; RV32I-NEXT: sw a4, 20(sp)
+; RV32I-NEXT: sw a5, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
+; RV32I-NEXT: srli a0, a1, 3
+; RV32I-NEXT: andi a3, a1, 31
+; RV32I-NEXT: andi a0, a0, 12
+; RV32I-NEXT: sub a0, t2, a0
+; RV32I-NEXT: lw a4, 0(a0)
+; RV32I-NEXT: lw a5, 4(a0)
+; RV32I-NEXT: lw a6, 8(a0)
+; RV32I-NEXT: lw a0, 12(a0)
+; RV32I-NEXT: xori a3, a3, 31
+; RV32I-NEXT: sll a7, a5, a1
+; RV32I-NEXT: srli t0, a4, 1
+; RV32I-NEXT: sll a0, a0, a1
+; RV32I-NEXT: srli t1, a6, 1
+; RV32I-NEXT: sll a6, a6, a1
+; RV32I-NEXT: srli a5, a5, 1
+; RV32I-NEXT: sll a1, a4, a1
+; RV32I-NEXT: srl a4, t0, a3
+; RV32I-NEXT: srl t0, t1, a3
+; RV32I-NEXT: srl a3, a5, a3
+; RV32I-NEXT: srli a5, a1, 16
+; RV32I-NEXT: srli t1, a1, 24
+; RV32I-NEXT: srli t2, a1, 8
+; RV32I-NEXT: or a4, a7, a4
+; RV32I-NEXT: or a0, a0, t0
+; RV32I-NEXT: or a3, a6, a3
+; RV32I-NEXT: sb a1, 0(a2)
; RV32I-NEXT: sb t2, 1(a2)
; RV32I-NEXT: sb a5, 2(a2)
; RV32I-NEXT: sb t1, 3(a2)
-; RV32I-NEXT: srli a0, a3, 16
-; RV32I-NEXT: srli a4, a3, 24
-; RV32I-NEXT: srli a5, a3, 8
-; RV32I-NEXT: srli a7, a6, 16
-; RV32I-NEXT: srli t0, a6, 24
-; RV32I-NEXT: srli t1, a6, 8
-; RV32I-NEXT: srli t2, a1, 16
-; RV32I-NEXT: srli t3, a1, 24
+; RV32I-NEXT: srli a1, a3, 16
+; RV32I-NEXT: srli a5, a3, 24
+; RV32I-NEXT: srli a6, a3, 8
+; RV32I-NEXT: srli a7, a0, 16
+; RV32I-NEXT: srli t0, a0, 24
+; RV32I-NEXT: srli t1, a0, 8
+; RV32I-NEXT: srli t2, a4, 16
+; RV32I-NEXT: srli t3, a4, 24
; RV32I-NEXT: sb a3, 8(a2)
-; RV32I-NEXT: sb a5, 9(a2)
-; RV32I-NEXT: sb a0, 10(a2)
-; RV32I-NEXT: sb a4, 11(a2)
-; RV32I-NEXT: srli a0, a1, 8
-; RV32I-NEXT: sb a6, 12(a2)
+; RV32I-NEXT: sb a6, 9(a2)
+; RV32I-NEXT: sb a1, 10(a2)
+; RV32I-NEXT: sb a5, 11(a2)
+; RV32I-NEXT: srli a1, a4, 8
+; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: sb t1, 13(a2)
; RV32I-NEXT: sb a7, 14(a2)
; RV32I-NEXT: sb t0, 15(a2)
-; RV32I-NEXT: sb a1, 4(a2)
-; RV32I-NEXT: sb a0, 5(a2)
+; RV32I-NEXT: sb a4, 4(a2)
+; RV32I-NEXT: sb a1, 5(a2)
; RV32I-NEXT: sb t2, 6(a2)
; RV32I-NEXT: sb t3, 7(a2)
; RV32I-NEXT: addi sp, sp, 32
@@ -1168,73 +1186,82 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lbu t1, 6(a0)
; RV32I-NEXT: lbu t2, 7(a0)
; RV32I-NEXT: slli a4, a4, 8
+; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: lbu a4, 8(a0)
+; RV32I-NEXT: lbu t3, 9(a0)
+; RV32I-NEXT: lbu t4, 10(a0)
+; RV32I-NEXT: lbu t5, 11(a0)
; RV32I-NEXT: slli a5, a5, 16
; RV32I-NEXT: slli a6, a6, 24
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a5, t0, a7
-; RV32I-NEXT: lbu a6, 8(a0)
-; RV32I-NEXT: lbu a7, 9(a0)
-; RV32I-NEXT: lbu t0, 10(a0)
-; RV32I-NEXT: lbu t3, 11(a0)
; RV32I-NEXT: slli t1, t1, 16
; RV32I-NEXT: slli t2, t2, 24
-; RV32I-NEXT: slli a7, a7, 8
-; RV32I-NEXT: slli t0, t0, 16
-; RV32I-NEXT: slli t3, t3, 24
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a7, t3, t0
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: or a6, t0, a7
+; RV32I-NEXT: or a7, t2, t1
; RV32I-NEXT: lbu t0, 12(a0)
-; RV32I-NEXT: lbu t2, 13(a0)
-; RV32I-NEXT: lbu t3, 14(a0)
-; RV32I-NEXT: lbu t4, 15(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: or a1, t2, t0
-; RV32I-NEXT: mv t0, sp
-; RV32I-NEXT: slli t3, t3, 16
-; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: srli a4, a0, 3
-; RV32I-NEXT: or a5, t1, a5
-; RV32I-NEXT: andi t1, a0, 31
-; RV32I-NEXT: or t2, t4, t3
-; RV32I-NEXT: srai t3, t4, 31
-; RV32I-NEXT: andi a4, a4, 12
-; RV32I-NEXT: xori t1, t1, 31
+; RV32I-NEXT: lbu t1, 13(a0)
+; RV32I-NEXT: lbu t2, 14(a0)
+; RV32I-NEXT: lbu a0, 15(a0)
+; RV32I-NEXT: slli t3, t3, 8
+; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t5, t5, 24
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or a4, t3, a4
+; RV32I-NEXT: or t3, t5, t4
+; RV32I-NEXT: or t0, t1, t0
+; RV32I-NEXT: lbu t1, 1(a1)
+; RV32I-NEXT: lbu t4, 0(a1)
+; RV32I-NEXT: lbu t5, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
+; RV32I-NEXT: slli t1, t1, 8
+; RV32I-NEXT: or t1, t1, t4
+; RV32I-NEXT: slli t5, t5, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or a1, a1, t5
+; RV32I-NEXT: or a3, a5, a3
+; RV32I-NEXT: mv a5, sp
+; RV32I-NEXT: slli t2, t2, 16
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: or t2, a0, t2
+; RV32I-NEXT: srai a0, a0, 31
; RV32I-NEXT: or a6, a7, a6
-; RV32I-NEXT: or a1, t2, a1
-; RV32I-NEXT: sw t3, 16(sp)
-; RV32I-NEXT: sw t3, 20(sp)
-; RV32I-NEXT: sw t3, 24(sp)
-; RV32I-NEXT: sw t3, 28(sp)
-; RV32I-NEXT: add a4, t0, a4
+; RV32I-NEXT: or a4, t3, a4
+; RV32I-NEXT: or a7, t2, t0
+; RV32I-NEXT: or a1, a1, t1
+; RV32I-NEXT: sw a0, 16(sp)
+; RV32I-NEXT: sw a0, 20(sp)
+; RV32I-NEXT: sw a0, 24(sp)
+; RV32I-NEXT: sw a0, 28(sp)
; RV32I-NEXT: sw a3, 0(sp)
-; RV32I-NEXT: sw a5, 4(sp)
-; RV32I-NEXT: sw a6, 8(sp)
-; RV32I-NEXT: sw a1, 12(sp)
-; RV32I-NEXT: lw a1, 4(a4)
-; RV32I-NEXT: lw a3, 8(a4)
-; RV32I-NEXT: lw a5, 0(a4)
-; RV32I-NEXT: lw a4, 12(a4)
-; RV32I-NEXT: srl a6, a1, a0
-; RV32I-NEXT: slli a7, a3, 1
-; RV32I-NEXT: srl a5, a5, a0
-; RV32I-NEXT: slli a1, a1, 1
-; RV32I-NEXT: srl a3, a3, a0
-; RV32I-NEXT: slli t0, a4, 1
-; RV32I-NEXT: sra a0, a4, a0
-; RV32I-NEXT: sll a4, a7, t1
-; RV32I-NEXT: sll a1, a1, t1
-; RV32I-NEXT: sll a7, t0, t1
+; RV32I-NEXT: sw a6, 4(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a7, 12(sp)
+; RV32I-NEXT: srli a0, a1, 3
+; RV32I-NEXT: andi a3, a1, 31
+; RV32I-NEXT: andi a0, a0, 12
+; RV32I-NEXT: xori a3, a3, 31
+; RV32I-NEXT: add a0, a5, a0
+; RV32I-NEXT: lw a4, 4(a0)
+; RV32I-NEXT: lw a5, 8(a0)
+; RV32I-NEXT: lw a6, 0(a0)
+; RV32I-NEXT: lw a0, 12(a0)
+; RV32I-NEXT: srl a7, a4, a1
+; RV32I-NEXT: slli t0, a5, 1
+; RV32I-NEXT: srl a6, a6, a1
+; RV32I-NEXT: slli a4, a4, 1
+; RV32I-NEXT: srl a5, a5, a1
+; RV32I-NEXT: slli t1, a0, 1
+; RV32I-NEXT: sra a0, a0, a1
+; RV32I-NEXT: sll a1, t0, a3
+; RV32I-NEXT: sll a4, a4, a3
+; RV32I-NEXT: sll a3, t1, a3
; RV32I-NEXT: srli t0, a0, 16
; RV32I-NEXT: srli t1, a0, 24
; RV32I-NEXT: srli t2, a0, 8
+; RV32I-NEXT: or a1, a7, a1
; RV32I-NEXT: or a4, a6, a4
-; RV32I-NEXT: or a1, a5, a1
-; RV32I-NEXT: or a3, a3, a7
+; RV32I-NEXT: or a3, a5, a3
; RV32I-NEXT: sb a0, 12(a2)
; RV32I-NEXT: sb t2, 13(a2)
; RV32I-NEXT: sb t0, 14(a2)
@@ -1242,21 +1269,21 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: srli a0, a3, 16
; RV32I-NEXT: srli a5, a3, 24
; RV32I-NEXT: srli a6, a3, 8
-; RV32I-NEXT: srli a7, a1, 16
-; RV32I-NEXT: srli t0, a1, 24
-; RV32I-NEXT: srli t1, a1, 8
-; RV32I-NEXT: srli t2, a4, 16
-; RV32I-NEXT: srli t3, a4, 24
+; RV32I-NEXT: srli a7, a4, 16
+; RV32I-NEXT: srli t0, a4, 24
+; RV32I-NEXT: srli t1, a4, 8
+; RV32I-NEXT: srli t2, a1, 16
+; RV32I-NEXT: srli t3, a1, 24
; RV32I-NEXT: sb a3, 8(a2)
; RV32I-NEXT: sb a6, 9(a2)
; RV32I-NEXT: sb a0, 10(a2)
; RV32I-NEXT: sb a5, 11(a2)
-; RV32I-NEXT: srli a0, a4, 8
-; RV32I-NEXT: sb a1, 0(a2)
+; RV32I-NEXT: srli a0, a1, 8
+; RV32I-NEXT: sb a4, 0(a2)
; RV32I-NEXT: sb t1, 1(a2)
; RV32I-NEXT: sb a7, 2(a2)
; RV32I-NEXT: sb t0, 3(a2)
-; RV32I-NEXT: sb a4, 4(a2)
+; RV32I-NEXT: sb a1, 4(a2)
; RV32I-NEXT: sb a0, 5(a2)
; RV32I-NEXT: sb t2, 6(a2)
; RV32I-NEXT: sb t3, 7(a2)
@@ -1272,17 +1299,19 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: lshr_32bytes:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -144
-; RV64I-NEXT: sd s0, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s9, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -160
+; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
@@ -1299,122 +1328,143 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: slli a5, a5, 16
-; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
+; RV64I-NEXT: slli a4, a4, 8
+; RV64I-NEXT: slli s8, a5, 16
+; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
+; RV64I-NEXT: or a5, a4, a3
+; RV64I-NEXT: or a6, a6, s8
+; RV64I-NEXT: or a3, t0, a7
+; RV64I-NEXT: or a4, t2, t1
+; RV64I-NEXT: lbu s8, 20(a0)
+; RV64I-NEXT: lbu s9, 21(a0)
+; RV64I-NEXT: lbu s10, 22(a0)
+; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a6, t2, t1
-; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: lbu t5, 20(a0)
-; RV64I-NEXT: lbu t6, 21(a0)
-; RV64I-NEXT: lbu s8, 22(a0)
-; RV64I-NEXT: lbu s9, 23(a0)
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
+; RV64I-NEXT: or a7, t4, t3
+; RV64I-NEXT: or t0, t6, t5
+; RV64I-NEXT: or t1, s1, s0
+; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: lbu t6, 24(a0)
+; RV64I-NEXT: lbu s0, 25(a0)
+; RV64I-NEXT: lbu s1, 26(a0)
+; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
-; RV64I-NEXT: or t1, s1, s0
-; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
-; RV64I-NEXT: lbu s0, 24(a0)
-; RV64I-NEXT: lbu s1, 25(a0)
-; RV64I-NEXT: lbu s2, 26(a0)
-; RV64I-NEXT: lbu s3, 27(a0)
-; RV64I-NEXT: slli t6, t6, 8
-; RV64I-NEXT: slli s8, s8, 16
-; RV64I-NEXT: slli s9, s9, 24
-; RV64I-NEXT: slli s1, s1, 8
-; RV64I-NEXT: or t5, t6, t5
-; RV64I-NEXT: or t6, s9, s8
-; RV64I-NEXT: or s0, s1, s0
-; RV64I-NEXT: lbu s1, 28(a0)
+; RV64I-NEXT: or t5, s9, s8
+; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
-; RV64I-NEXT: lbu a0, 0(a1)
+; RV64I-NEXT: slli s10, s10, 16
+; RV64I-NEXT: slli s11, s11, 24
+; RV64I-NEXT: slli s0, s0, 8
+; RV64I-NEXT: slli s1, s1, 16
+; RV64I-NEXT: slli s2, s2, 24
+; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or a0, s11, s10
+; RV64I-NEXT: or t6, s0, t6
+; RV64I-NEXT: or s0, s2, s1
+; RV64I-NEXT: or s1, s4, s3
+; RV64I-NEXT: lbu s2, 0(a1)
+; RV64I-NEXT: lbu s3, 1(a1)
+; RV64I-NEXT: lbu s4, 2(a1)
+; RV64I-NEXT: lbu s7, 3(a1)
+; RV64I-NEXT: slli s5, s5, 16
+; RV64I-NEXT: slli s6, s6, 24
+; RV64I-NEXT: slli s3, s3, 8
+; RV64I-NEXT: slli s4, s4, 16
+; RV64I-NEXT: slli s7, s7, 24
+; RV64I-NEXT: or s5, s6, s5
+; RV64I-NEXT: or s2, s3, s2
+; RV64I-NEXT: or s3, s7, s4
+; RV64I-NEXT: lbu s4, 5(a1)
+; RV64I-NEXT: lbu s6, 4(a1)
+; RV64I-NEXT: lbu s7, 6(a1)
+; RV64I-NEXT: lbu a1, 7(a1)
+; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or s4, s4, s6
+; RV64I-NEXT: slli s7, s7, 16
+; RV64I-NEXT: slli a1, a1, 24
+; RV64I-NEXT: or a1, a1, s7
; RV64I-NEXT: sd zero, 32(sp)
; RV64I-NEXT: sd zero, 40(sp)
; RV64I-NEXT: sd zero, 48(sp)
; RV64I-NEXT: sd zero, 56(sp)
-; RV64I-NEXT: slli s2, s2, 16
-; RV64I-NEXT: slli s3, s3, 24
-; RV64I-NEXT: or a1, s3, s2
-; RV64I-NEXT: mv s2, sp
-; RV64I-NEXT: slli s4, s4, 8
-; RV64I-NEXT: slli s5, s5, 16
-; RV64I-NEXT: slli s6, s6, 24
-; RV64I-NEXT: or s1, s4, s1
-; RV64I-NEXT: srli s3, a0, 3
-; RV64I-NEXT: or s4, s6, s5
-; RV64I-NEXT: andi s5, a0, 63
-; RV64I-NEXT: andi s3, s3, 24
-; RV64I-NEXT: xori s5, s5, 63
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a6, t2, t1
-; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: or a1, a1, s0
-; RV64I-NEXT: or t1, s4, s1
-; RV64I-NEXT: add s2, s2, s3
-; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: slli t0, t0, 32
-; RV64I-NEXT: slli t1, t1, 32
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: mv a6, sp
; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a1, t1, a1
+; RV64I-NEXT: or a4, t0, a7
+; RV64I-NEXT: or a7, t2, t1
+; RV64I-NEXT: or t0, t4, t3
+; RV64I-NEXT: or a0, a0, t5
+; RV64I-NEXT: or t1, s0, t6
+; RV64I-NEXT: or t2, s5, s1
+; RV64I-NEXT: or t3, s3, s2
+; RV64I-NEXT: or a1, a1, s4
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: slli a7, a7, 32
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: slli t2, t2, 32
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: or a3, a3, a5
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a0, a0, t0
+; RV64I-NEXT: or a5, t2, t1
+; RV64I-NEXT: or a1, a1, t3
; RV64I-NEXT: sd a3, 0(sp)
; RV64I-NEXT: sd a4, 8(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a1, 24(sp)
-; RV64I-NEXT: ld a1, 8(s2)
-; RV64I-NEXT: ld a3, 16(s2)
-; RV64I-NEXT: ld a4, 0(s2)
-; RV64I-NEXT: ld a5, 24(s2)
-; RV64I-NEXT: srl a6, a1, a0
-; RV64I-NEXT: slli a7, a3, 1
-; RV64I-NEXT: srl a4, a4, a0
-; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: srl a3, a3, a0
+; RV64I-NEXT: sd a0, 16(sp)
+; RV64I-NEXT: sd a5, 24(sp)
+; RV64I-NEXT: srli a0, a1, 3
+; RV64I-NEXT: andi a3, a1, 63
+; RV64I-NEXT: andi a0, a0, 24
+; RV64I-NEXT: xori a3, a3, 63
+; RV64I-NEXT: add a0, a6, a0
+; RV64I-NEXT: ld a4, 8(a0)
+; RV64I-NEXT: ld a5, 16(a0)
+; RV64I-NEXT: ld a6, 0(a0)
+; RV64I-NEXT: ld a0, 24(a0)
+; RV64I-NEXT: srl a7, a4, a1
; RV64I-NEXT: slli t0, a5, 1
-; RV64I-NEXT: srl a5, a5, a0
-; RV64I-NEXT: sll a0, a7, s5
-; RV64I-NEXT: sll a1, a1, s5
-; RV64I-NEXT: sll a7, t0, s5
-; RV64I-NEXT: srli t0, a5, 56
-; RV64I-NEXT: srli t1, a5, 48
-; RV64I-NEXT: srli t2, a5, 40
-; RV64I-NEXT: srli t3, a5, 32
-; RV64I-NEXT: srli t4, a5, 24
-; RV64I-NEXT: srli t5, a5, 16
-; RV64I-NEXT: srli t6, a5, 8
-; RV64I-NEXT: or a0, a6, a0
-; RV64I-NEXT: or a1, a4, a1
-; RV64I-NEXT: or a3, a3, a7
+; RV64I-NEXT: srl a6, a6, a1
+; RV64I-NEXT: slli a4, a4, 1
+; RV64I-NEXT: srl a5, a5, a1
+; RV64I-NEXT: slli t1, a0, 1
+; RV64I-NEXT: srl t2, a0, a1
+; RV64I-NEXT: sll a0, t0, a3
+; RV64I-NEXT: sll a1, a4, a3
+; RV64I-NEXT: sll a3, t1, a3
+; RV64I-NEXT: srli a4, t2, 56
+; RV64I-NEXT: srli t0, t2, 48
+; RV64I-NEXT: srli t1, t2, 40
+; RV64I-NEXT: srli t3, t2, 32
+; RV64I-NEXT: srli t4, t2, 24
+; RV64I-NEXT: srli t5, t2, 16
+; RV64I-NEXT: srli t6, t2, 8
+; RV64I-NEXT: or a0, a7, a0
+; RV64I-NEXT: or a1, a6, a1
+; RV64I-NEXT: or a3, a5, a3
; RV64I-NEXT: sb t3, 28(a2)
-; RV64I-NEXT: sb t2, 29(a2)
-; RV64I-NEXT: sb t1, 30(a2)
-; RV64I-NEXT: sb t0, 31(a2)
-; RV64I-NEXT: sb a5, 24(a2)
+; RV64I-NEXT: sb t1, 29(a2)
+; RV64I-NEXT: sb t0, 30(a2)
+; RV64I-NEXT: sb a4, 31(a2)
+; RV64I-NEXT: sb t2, 24(a2)
; RV64I-NEXT: sb t6, 25(a2)
; RV64I-NEXT: sb t5, 26(a2)
; RV64I-NEXT: sb t4, 27(a2)
@@ -1463,17 +1513,19 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sb a1, 9(a2)
; RV64I-NEXT: sb a5, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: ld s0, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s9, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 144
+; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: lshr_32bytes:
@@ -1498,55 +1550,67 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lbu a7, 3(a0)
; RV32I-NEXT: lbu a5, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
-; RV32I-NEXT: lbu t3, 6(a0)
-; RV32I-NEXT: lbu t6, 7(a0)
-; RV32I-NEXT: lbu s2, 8(a0)
-; RV32I-NEXT: lbu s3, 9(a0)
-; RV32I-NEXT: lbu s4, 10(a0)
-; RV32I-NEXT: lbu s5, 11(a0)
-; RV32I-NEXT: lbu s7, 12(a0)
-; RV32I-NEXT: lbu s8, 13(a0)
-; RV32I-NEXT: lbu s9, 14(a0)
-; RV32I-NEXT: lbu s10, 15(a0)
-; RV32I-NEXT: lbu s11, 16(a0)
-; RV32I-NEXT: lbu ra, 17(a0)
-; RV32I-NEXT: lbu t4, 18(a0)
-; RV32I-NEXT: lbu s0, 19(a0)
+; RV32I-NEXT: lbu t1, 6(a0)
+; RV32I-NEXT: lbu t2, 7(a0)
+; RV32I-NEXT: lbu t3, 8(a0)
+; RV32I-NEXT: lbu t4, 9(a0)
+; RV32I-NEXT: lbu t5, 10(a0)
+; RV32I-NEXT: lbu t6, 11(a0)
+; RV32I-NEXT: lbu s0, 12(a0)
+; RV32I-NEXT: lbu s2, 13(a0)
+; RV32I-NEXT: lbu s4, 14(a0)
+; RV32I-NEXT: lbu s5, 15(a0)
+; RV32I-NEXT: lbu s6, 16(a0)
+; RV32I-NEXT: lbu s7, 17(a0)
+; RV32I-NEXT: lbu s8, 18(a0)
+; RV32I-NEXT: lbu s9, 19(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a7, a7, 24
; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a4, a7, a6
-; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 21(a0)
-; RV32I-NEXT: lbu t5, 22(a0)
-; RV32I-NEXT: lbu s1, 23(a0)
+; RV32I-NEXT: lbu s10, 20(a0)
+; RV32I-NEXT: lbu s11, 21(a0)
+; RV32I-NEXT: lbu ra, 22(a0)
+; RV32I-NEXT: lbu a3, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t2, t2, 24
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: slli t5, t5, 16
; RV32I-NEXT: slli t6, t6, 24
-; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or a5, t0, a5
+; RV32I-NEXT: or a6, t2, t1
+; RV32I-NEXT: or a7, t4, t3
+; RV32I-NEXT: or t0, t6, t5
+; RV32I-NEXT: lbu s1, 24(a0)
+; RV32I-NEXT: lbu s3, 25(a0)
+; RV32I-NEXT: lbu t4, 26(a0)
+; RV32I-NEXT: lbu t5, 27(a0)
+; RV32I-NEXT: slli s2, s2, 8
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s5, s5, 24
-; RV32I-NEXT: or a5, t0, a5
-; RV32I-NEXT: or a6, t6, t3
-; RV32I-NEXT: or a7, s3, s2
-; RV32I-NEXT: or t0, s5, s4
-; RV32I-NEXT: lbu t3, 24(a0)
-; RV32I-NEXT: lbu s5, 25(a0)
-; RV32I-NEXT: lbu s6, 26(a0)
-; RV32I-NEXT: lbu t6, 27(a0)
-; RV32I-NEXT: slli s8, s8, 8
-; RV32I-NEXT: slli s9, s9, 16
-; RV32I-NEXT: slli s10, s10, 24
-; RV32I-NEXT: slli ra, ra, 8
-; RV32I-NEXT: or s7, s8, s7
-; RV32I-NEXT: or s2, s10, s9
-; RV32I-NEXT: or s3, ra, s11
-; RV32I-NEXT: lbu s4, 28(a0)
-; RV32I-NEXT: lbu s8, 29(a0)
-; RV32I-NEXT: lbu s9, 30(a0)
-; RV32I-NEXT: lbu s10, 31(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
+; RV32I-NEXT: slli s7, s7, 8
+; RV32I-NEXT: or t1, s2, s0
+; RV32I-NEXT: or t2, s5, s4
+; RV32I-NEXT: or t3, s7, s6
+; RV32I-NEXT: lbu t6, 28(a0)
+; RV32I-NEXT: lbu s4, 29(a0)
+; RV32I-NEXT: lbu s5, 30(a0)
+; RV32I-NEXT: lbu s6, 31(a0)
+; RV32I-NEXT: slli s8, s8, 16
+; RV32I-NEXT: slli s9, s9, 24
+; RV32I-NEXT: slli s11, s11, 8
+; RV32I-NEXT: slli ra, ra, 16
+; RV32I-NEXT: slli a3, a3, 24
+; RV32I-NEXT: or a0, s9, s8
+; RV32I-NEXT: or s0, s11, s10
+; RV32I-NEXT: or s2, a3, ra
+; RV32I-NEXT: lbu a3, 0(a1)
+; RV32I-NEXT: lbu s7, 1(a1)
+; RV32I-NEXT: lbu s8, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: sw zero, 56(sp)
; RV32I-NEXT: sw zero, 60(sp)
; RV32I-NEXT: sw zero, 64(sp)
@@ -1555,90 +1619,89 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw zero, 44(sp)
; RV32I-NEXT: sw zero, 48(sp)
; RV32I-NEXT: sw zero, 52(sp)
+; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or s1, s3, s1
+; RV32I-NEXT: addi s3, sp, 8
; RV32I-NEXT: slli t4, t4, 16
-; RV32I-NEXT: slli s0, s0, 24
-; RV32I-NEXT: or t4, s0, t4
-; RV32I-NEXT: addi s0, sp, 8
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: slli t5, t5, 16
-; RV32I-NEXT: slli s1, s1, 24
-; RV32I-NEXT: slli s5, s5, 8
-; RV32I-NEXT: slli s6, s6, 16
-; RV32I-NEXT: slli t6, t6, 24
-; RV32I-NEXT: slli s8, s8, 8
-; RV32I-NEXT: slli s9, s9, 16
-; RV32I-NEXT: slli s10, s10, 24
-; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t5, t5, 24
+; RV32I-NEXT: slli s4, s4, 8
+; RV32I-NEXT: slli s5, s5, 16
+; RV32I-NEXT: slli s6, s6, 24
+; RV32I-NEXT: slli s7, s7, 8
+; RV32I-NEXT: slli s8, s8, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or t4, t5, t4
+; RV32I-NEXT: or t5, s4, t6
+; RV32I-NEXT: or t6, s6, s5
+; RV32I-NEXT: or a3, s7, a3
+; RV32I-NEXT: or a1, a1, s8
+; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: or a4, a4, s4
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: or a6, t0, a7
+; RV32I-NEXT: or a7, t2, t1
+; RV32I-NEXT: or t0, a0, t3
+; RV32I-NEXT: or t1, s2, s0
+; RV32I-NEXT: or t2, t4, s1
+; RV32I-NEXT: or t3, t6, t5
+; RV32I-NEXT: or a0, a1, a3
+; RV32I-NEXT: sw t0, 24(sp)
+; RV32I-NEXT: sw t1, 28(sp)
+; RV32I-NEXT: sw t2, 32(sp)
+; RV32I-NEXT: sw t3, 36(sp)
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a6, 16(sp)
+; RV32I-NEXT: sw a7, 20(sp)
; RV32I-NEXT: srli a1, a0, 3
-; RV32I-NEXT: or t2, s1, t5
-; RV32I-NEXT: andi t5, a0, 31
-; RV32I-NEXT: or t3, s5, t3
-; RV32I-NEXT: or t6, t6, s6
-; RV32I-NEXT: or s1, s8, s4
-; RV32I-NEXT: or s4, s10, s9
-; RV32I-NEXT: andi s5, a1, 28
-; RV32I-NEXT: xori a1, t5, 31
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a5, t0, a7
-; RV32I-NEXT: or a6, s2, s7
-; RV32I-NEXT: or a7, t4, s3
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or t1, t6, t3
-; RV32I-NEXT: or t2, s4, s1
-; RV32I-NEXT: add s0, s0, s5
-; RV32I-NEXT: sw a7, 24(sp)
-; RV32I-NEXT: sw t0, 28(sp)
-; RV32I-NEXT: sw t1, 32(sp)
-; RV32I-NEXT: sw t2, 36(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a5, 16(sp)
-; RV32I-NEXT: sw a6, 20(sp)
-; RV32I-NEXT: lw a3, 0(s0)
-; RV32I-NEXT: lw a4, 4(s0)
-; RV32I-NEXT: lw a5, 8(s0)
-; RV32I-NEXT: lw a6, 12(s0)
-; RV32I-NEXT: lw a7, 16(s0)
-; RV32I-NEXT: lw t0, 20(s0)
-; RV32I-NEXT: lw t1, 24(s0)
-; RV32I-NEXT: lw t2, 28(s0)
-; RV32I-NEXT: srl t3, a4, a0
-; RV32I-NEXT: slli t4, a5, 1
+; RV32I-NEXT: andi a3, a0, 31
+; RV32I-NEXT: andi a4, a1, 28
+; RV32I-NEXT: xori a1, a3, 31
+; RV32I-NEXT: add a4, s3, a4
+; RV32I-NEXT: lw a3, 0(a4)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a6, 8(a4)
+; RV32I-NEXT: lw a7, 12(a4)
+; RV32I-NEXT: lw t0, 16(a4)
+; RV32I-NEXT: lw t1, 20(a4)
+; RV32I-NEXT: lw t2, 24(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: srl t3, a5, a0
+; RV32I-NEXT: slli t4, a6, 1
; RV32I-NEXT: srl a3, a3, a0
-; RV32I-NEXT: slli a4, a4, 1
-; RV32I-NEXT: srl t5, a6, a0
-; RV32I-NEXT: slli t6, a7, 1
-; RV32I-NEXT: srl a5, a5, a0
-; RV32I-NEXT: slli a6, a6, 1
-; RV32I-NEXT: srl s0, t0, a0
-; RV32I-NEXT: slli s1, t1, 1
-; RV32I-NEXT: srl a7, a7, a0
-; RV32I-NEXT: slli t0, t0, 1
-; RV32I-NEXT: srl t1, t1, a0
-; RV32I-NEXT: slli s2, t2, 1
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: srl t5, a7, a0
+; RV32I-NEXT: slli t6, t0, 1
+; RV32I-NEXT: srl a6, a6, a0
+; RV32I-NEXT: slli a7, a7, 1
+; RV32I-NEXT: srl s0, t1, a0
+; RV32I-NEXT: slli s1, t2, 1
+; RV32I-NEXT: srl t0, t0, a0
+; RV32I-NEXT: slli t1, t1, 1
; RV32I-NEXT: srl t2, t2, a0
+; RV32I-NEXT: slli s2, a4, 1
+; RV32I-NEXT: srl s3, a4, a0
; RV32I-NEXT: sll a0, t4, a1
-; RV32I-NEXT: sll a4, a4, a1
-; RV32I-NEXT: sll t4, t6, a1
-; RV32I-NEXT: sll a6, a6, a1
-; RV32I-NEXT: sll t6, s1, a1
-; RV32I-NEXT: sll t0, t0, a1
-; RV32I-NEXT: sll s1, s2, a1
-; RV32I-NEXT: srli s2, t2, 24
-; RV32I-NEXT: srli s3, t2, 16
-; RV32I-NEXT: srli s4, t2, 8
+; RV32I-NEXT: sll a4, a5, a1
+; RV32I-NEXT: sll a5, t6, a1
+; RV32I-NEXT: sll a7, a7, a1
+; RV32I-NEXT: sll t4, s1, a1
+; RV32I-NEXT: sll t1, t1, a1
+; RV32I-NEXT: sll t6, s2, a1
+; RV32I-NEXT: srli s1, s3, 24
+; RV32I-NEXT: srli s2, s3, 16
+; RV32I-NEXT: srli s4, s3, 8
; RV32I-NEXT: or a0, t3, a0
; RV32I-NEXT: or a1, a3, a4
-; RV32I-NEXT: or a3, t5, t4
-; RV32I-NEXT: or a4, a5, a6
-; RV32I-NEXT: or a5, s0, t6
-; RV32I-NEXT: or a6, a7, t0
-; RV32I-NEXT: or a7, t1, s1
-; RV32I-NEXT: sb t2, 28(a2)
+; RV32I-NEXT: or a3, t5, a5
+; RV32I-NEXT: or a4, a6, a7
+; RV32I-NEXT: or a5, s0, t4
+; RV32I-NEXT: or a6, t0, t1
+; RV32I-NEXT: or a7, t2, t6
+; RV32I-NEXT: sb s3, 28(a2)
; RV32I-NEXT: sb s4, 29(a2)
-; RV32I-NEXT: sb s3, 30(a2)
-; RV32I-NEXT: sb s2, 31(a2)
+; RV32I-NEXT: sb s2, 30(a2)
+; RV32I-NEXT: sb s1, 31(a2)
; RV32I-NEXT: srli t0, a7, 24
; RV32I-NEXT: srli t1, a7, 16
; RV32I-NEXT: srli t2, a7, 8
@@ -1712,17 +1775,19 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: shl_32bytes:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -144
-; RV64I-NEXT: sd s0, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s9, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -160
+; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
@@ -1739,125 +1804,146 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: slli a5, a5, 16
-; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
+; RV64I-NEXT: slli a4, a4, 8
+; RV64I-NEXT: slli s8, a5, 16
+; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
+; RV64I-NEXT: or a5, a4, a3
+; RV64I-NEXT: or a6, a6, s8
+; RV64I-NEXT: or a3, t0, a7
+; RV64I-NEXT: or a4, t2, t1
+; RV64I-NEXT: lbu s8, 20(a0)
+; RV64I-NEXT: lbu s9, 21(a0)
+; RV64I-NEXT: lbu s10, 22(a0)
+; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a6, t2, t1
-; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: lbu t5, 20(a0)
-; RV64I-NEXT: lbu t6, 21(a0)
-; RV64I-NEXT: lbu s8, 22(a0)
-; RV64I-NEXT: lbu s9, 23(a0)
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
+; RV64I-NEXT: or a7, t4, t3
+; RV64I-NEXT: or t0, t6, t5
+; RV64I-NEXT: or t1, s1, s0
+; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: lbu t6, 24(a0)
+; RV64I-NEXT: lbu s0, 25(a0)
+; RV64I-NEXT: lbu s1, 26(a0)
+; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
-; RV64I-NEXT: or t1, s1, s0
-; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
-; RV64I-NEXT: lbu s0, 24(a0)
-; RV64I-NEXT: lbu s1, 25(a0)
-; RV64I-NEXT: lbu s2, 26(a0)
-; RV64I-NEXT: lbu s3, 27(a0)
-; RV64I-NEXT: slli t6, t6, 8
-; RV64I-NEXT: slli s8, s8, 16
-; RV64I-NEXT: slli s9, s9, 24
-; RV64I-NEXT: slli s1, s1, 8
-; RV64I-NEXT: or t5, t6, t5
-; RV64I-NEXT: or t6, s9, s8
-; RV64I-NEXT: or s0, s1, s0
-; RV64I-NEXT: lbu s1, 28(a0)
+; RV64I-NEXT: or t5, s9, s8
+; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
-; RV64I-NEXT: lbu a0, 0(a1)
+; RV64I-NEXT: slli s10, s10, 16
+; RV64I-NEXT: slli s11, s11, 24
+; RV64I-NEXT: slli s0, s0, 8
+; RV64I-NEXT: slli s1, s1, 16
+; RV64I-NEXT: slli s2, s2, 24
+; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or a0, s11, s10
+; RV64I-NEXT: or t6, s0, t6
+; RV64I-NEXT: or s0, s2, s1
+; RV64I-NEXT: or s1, s4, s3
+; RV64I-NEXT: lbu s2, 0(a1)
+; RV64I-NEXT: lbu s3, 1(a1)
+; RV64I-NEXT: lbu s4, 2(a1)
+; RV64I-NEXT: lbu s7, 3(a1)
+; RV64I-NEXT: slli s5, s5, 16
+; RV64I-NEXT: slli s6, s6, 24
+; RV64I-NEXT: slli s3, s3, 8
+; RV64I-NEXT: slli s4, s4, 16
+; RV64I-NEXT: slli s7, s7, 24
+; RV64I-NEXT: or s5, s6, s5
+; RV64I-NEXT: or s2, s3, s2
+; RV64I-NEXT: or s3, s7, s4
+; RV64I-NEXT: lbu s4, 5(a1)
+; RV64I-NEXT: lbu s6, 4(a1)
+; RV64I-NEXT: lbu s7, 6(a1)
+; RV64I-NEXT: lbu a1, 7(a1)
+; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or s4, s4, s6
+; RV64I-NEXT: slli s7, s7, 16
+; RV64I-NEXT: slli a1, a1, 24
+; RV64I-NEXT: or a1, a1, s7
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: sd zero, 24(sp)
-; RV64I-NEXT: slli s2, s2, 16
-; RV64I-NEXT: slli s3, s3, 24
-; RV64I-NEXT: or a1, s3, s2
-; RV64I-NEXT: addi s2, sp, 32
-; RV64I-NEXT: slli s4, s4, 8
-; RV64I-NEXT: slli s5, s5, 16
-; RV64I-NEXT: slli s6, s6, 24
-; RV64I-NEXT: or s1, s4, s1
-; RV64I-NEXT: srli s3, a0, 3
-; RV64I-NEXT: or s4, s6, s5
-; RV64I-NEXT: andi s5, a0, 63
-; RV64I-NEXT: andi s3, s3, 24
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a6, t2, t1
-; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: or a1, a1, s0
-; RV64I-NEXT: or t1, s4, s1
-; RV64I-NEXT: sub t2, s2, s3
-; RV64I-NEXT: slli a4, a4, 32
-; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: slli t0, t0, 32
-; RV64I-NEXT: slli t1, t1, 32
+; RV64I-NEXT: or a5, a6, a5
+; RV64I-NEXT: addi a6, sp, 32
; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a1, t1, a1
+; RV64I-NEXT: or a4, t0, a7
+; RV64I-NEXT: or a7, t2, t1
+; RV64I-NEXT: or t0, t4, t3
+; RV64I-NEXT: or a0, a0, t5
+; RV64I-NEXT: or t1, s0, t6
+; RV64I-NEXT: or t2, s5, s1
+; RV64I-NEXT: or t3, s3, s2
+; RV64I-NEXT: or a1, a1, s4
+; RV64I-NEXT: slli a3, a3, 32
+; RV64I-NEXT: slli a7, a7, 32
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: slli t2, t2, 32
+; RV64I-NEXT: slli a1, a1, 32
+; RV64I-NEXT: or a3, a3, a5
+; RV64I-NEXT: or a4, a7, a4
+; RV64I-NEXT: or a0, a0, t0
+; RV64I-NEXT: or a5, t2, t1
+; RV64I-NEXT: or a1, a1, t3
; RV64I-NEXT: sd a3, 32(sp)
; RV64I-NEXT: sd a4, 40(sp)
-; RV64I-NEXT: sd a5, 48(sp)
-; RV64I-NEXT: sd a1, 56(sp)
-; RV64I-NEXT: ld a1, 0(t2)
-; RV64I-NEXT: ld a3, 8(t2)
-; RV64I-NEXT: ld a4, 16(t2)
-; RV64I-NEXT: ld a5, 24(t2)
-; RV64I-NEXT: xori a6, s5, 63
-; RV64I-NEXT: sll a7, a3, a0
-; RV64I-NEXT: srli t0, a1, 1
-; RV64I-NEXT: sll a5, a5, a0
-; RV64I-NEXT: srli t1, a4, 1
-; RV64I-NEXT: sll a4, a4, a0
-; RV64I-NEXT: srli a3, a3, 1
-; RV64I-NEXT: sll t2, a1, a0
-; RV64I-NEXT: srl a0, t0, a6
-; RV64I-NEXT: srl a1, t1, a6
-; RV64I-NEXT: srl a3, a3, a6
-; RV64I-NEXT: srli a6, t2, 56
-; RV64I-NEXT: srli t0, t2, 48
-; RV64I-NEXT: srli t1, t2, 40
-; RV64I-NEXT: srli t3, t2, 32
-; RV64I-NEXT: srli t4, t2, 24
-; RV64I-NEXT: srli t5, t2, 16
-; RV64I-NEXT: srli t6, t2, 8
-; RV64I-NEXT: or a0, a7, a0
-; RV64I-NEXT: or a1, a5, a1
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: sb t3, 4(a2)
-; RV64I-NEXT: sb t1, 5(a2)
-; RV64I-NEXT: sb t0, 6(a2)
-; RV64I-NEXT: sb a6, 7(a2)
-; RV64I-NEXT: sb t2, 0(a2)
-; RV64I-NEXT: sb t6, 1(a2)
-; RV64I-NEXT: sb t5, 2(a2)
-; RV64I-NEXT: sb t4, 3(a2)
+; RV64I-NEXT: sd a0, 48(sp)
+; RV64I-NEXT: sd a5, 56(sp)
+; RV64I-NEXT: srli a0, a1, 3
+; RV64I-NEXT: andi a3, a1, 63
+; RV64I-NEXT: andi a0, a0, 24
+; RV64I-NEXT: sub a0, a6, a0
+; RV64I-NEXT: ld a4, 0(a0)
+; RV64I-NEXT: ld a5, 8(a0)
+; RV64I-NEXT: ld a6, 16(a0)
+; RV64I-NEXT: ld a0, 24(a0)
+; RV64I-NEXT: xori a3, a3, 63
+; RV64I-NEXT: sll a7, a5, a1
+; RV64I-NEXT: srli t0, a4, 1
+; RV64I-NEXT: sll t1, a0, a1
+; RV64I-NEXT: srli a0, a6, 1
+; RV64I-NEXT: sll a6, a6, a1
+; RV64I-NEXT: srli a5, a5, 1
+; RV64I-NEXT: sll a4, a4, a1
+; RV64I-NEXT: srl a1, t0, a3
+; RV64I-NEXT: srl t0, a0, a3
+; RV64I-NEXT: srl a3, a5, a3
+; RV64I-NEXT: srli a5, a4, 56
+; RV64I-NEXT: srli t2, a4, 48
+; RV64I-NEXT: srli t3, a4, 40
+; RV64I-NEXT: srli t4, a4, 32
+; RV64I-NEXT: srli t5, a4, 24
+; RV64I-NEXT: srli t6, a4, 16
+; RV64I-NEXT: srli s0, a4, 8
+; RV64I-NEXT: or a0, a7, a1
+; RV64I-NEXT: or a1, t1, t0
+; RV64I-NEXT: or a3, a6, a3
+; RV64I-NEXT: sb t4, 4(a2)
+; RV64I-NEXT: sb t3, 5(a2)
+; RV64I-NEXT: sb t2, 6(a2)
+; RV64I-NEXT: sb a5, 7(a2)
+; RV64I-NEXT: sb a4, 0(a2)
+; RV64I-NEXT: sb s0, 1(a2)
+; RV64I-NEXT: sb t6, 2(a2)
+; RV64I-NEXT: sb t5, 3(a2)
; RV64I-NEXT: srli a4, a3, 56
; RV64I-NEXT: srli a5, a3, 48
; RV64I-NEXT: srli a6, a3, 40
@@ -1903,17 +1989,19 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: sb a1, 9(a2)
; RV64I-NEXT: sb a5, 10(a2)
; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: ld s0, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s9, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 144
+; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: shl_32bytes:
@@ -1938,55 +2026,67 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lbu a7, 3(a0)
; RV32I-NEXT: lbu a5, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
-; RV32I-NEXT: lbu t3, 6(a0)
-; RV32I-NEXT: lbu t6, 7(a0)
-; RV32I-NEXT: lbu s2, 8(a0)
-; RV32I-NEXT: lbu s3, 9(a0)
-; RV32I-NEXT: lbu s4, 10(a0)
-; RV32I-NEXT: lbu s5, 11(a0)
-; RV32I-NEXT: lbu s7, 12(a0)
-; RV32I-NEXT: lbu s8, 13(a0)
-; RV32I-NEXT: lbu s9, 14(a0)
-; RV32I-NEXT: lbu s10, 15(a0)
-; RV32I-NEXT: lbu s11, 16(a0)
-; RV32I-NEXT: lbu ra, 17(a0)
-; RV32I-NEXT: lbu t4, 18(a0)
-; RV32I-NEXT: lbu s0, 19(a0)
+; RV32I-NEXT: lbu t1, 6(a0)
+; RV32I-NEXT: lbu t2, 7(a0)
+; RV32I-NEXT: lbu t3, 8(a0)
+; RV32I-NEXT: lbu t4, 9(a0)
+; RV32I-NEXT: lbu t5, 10(a0)
+; RV32I-NEXT: lbu t6, 11(a0)
+; RV32I-NEXT: lbu s0, 12(a0)
+; RV32I-NEXT: lbu s2, 13(a0)
+; RV32I-NEXT: lbu s4, 14(a0)
+; RV32I-NEXT: lbu s5, 15(a0)
+; RV32I-NEXT: lbu s6, 16(a0)
+; RV32I-NEXT: lbu s7, 17(a0)
+; RV32I-NEXT: lbu s8, 18(a0)
+; RV32I-NEXT: lbu s9, 19(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a7, a7, 24
; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a4, a7, a6
-; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 21(a0)
-; RV32I-NEXT: lbu t5, 22(a0)
-; RV32I-NEXT: lbu s1, 23(a0)
+; RV32I-NEXT: lbu s10, 20(a0)
+; RV32I-NEXT: lbu s11, 21(a0)
+; RV32I-NEXT: lbu ra, 22(a0)
+; RV32I-NEXT: lbu a3, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: slli t3, t3, 16
+; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t2, t2, 24
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: slli t5, t5, 16
; RV32I-NEXT: slli t6, t6, 24
-; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or a5, t0, a5
+; RV32I-NEXT: or a6, t2, t1
+; RV32I-NEXT: or a7, t4, t3
+; RV32I-NEXT: or t0, t6, t5
+; RV32I-NEXT: lbu s1, 24(a0)
+; RV32I-NEXT: lbu s3, 25(a0)
+; RV32I-NEXT: lbu t4, 26(a0)
+; RV32I-NEXT: lbu t5, 27(a0)
+; RV32I-NEXT: slli s2, s2, 8
; RV32I-NEXT: slli s4, s4, 16
; RV32I-NEXT: slli s5, s5, 24
-; RV32I-NEXT: or a5, t0, a5
-; RV32I-NEXT: or a6, t6, t3
-; RV32I-NEXT: or a7, s3, s2
-; RV32I-NEXT: or t0, s5, s4
-; RV32I-NEXT: lbu t3, 24(a0)
-; RV32I-NEXT: lbu s5, 25(a0)
-; RV32I-NEXT: lbu s6, 26(a0)
-; RV32I-NEXT: lbu t6, 27(a0)
-; RV32I-NEXT: slli s8, s8, 8
-; RV32I-NEXT: slli s9, s9, 16
-; RV32I-NEXT: slli s10, s10, 24
-; RV32I-NEXT: slli ra, ra, 8
-; RV32I-NEXT: or s7, s8, s7
-; RV32I-NEXT: or s2, s10, s9
-; RV32I-NEXT: or s3, ra, s11
-; RV32I-NEXT: lbu s4, 28(a0)
-; RV32I-NEXT: lbu s8, 29(a0)
-; RV32I-NEXT: lbu s9, 30(a0)
-; RV32I-NEXT: lbu s10, 31(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
+; RV32I-NEXT: slli s7, s7, 8
+; RV32I-NEXT: or t1, s2, s0
+; RV32I-NEXT: or t2, s5, s4
+; RV32I-NEXT: or t3, s7, s6
+; RV32I-NEXT: lbu t6, 28(a0)
+; RV32I-NEXT: lbu s4, 29(a0)
+; RV32I-NEXT: lbu s5, 30(a0)
+; RV32I-NEXT: lbu s6, 31(a0)
+; RV32I-NEXT: slli s8, s8, 16
+; RV32I-NEXT: slli s9, s9, 24
+; RV32I-NEXT: slli s11, s11, 8
+; RV32I-NEXT: slli ra, ra, 16
+; RV32I-NEXT: slli a3, a3, 24
+; RV32I-NEXT: or a0, s9, s8
+; RV32I-NEXT: or s0, s11, s10
+; RV32I-NEXT: or s2, a3, ra
+; RV32I-NEXT: lbu a3, 0(a1)
+; RV32I-NEXT: lbu s7, 1(a1)
+; RV32I-NEXT: lbu s8, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
; RV32I-NEXT: sw zero, 24(sp)
; RV32I-NEXT: sw zero, 28(sp)
; RV32I-NEXT: sw zero, 32(sp)
@@ -1995,89 +2095,88 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: sw zero, 12(sp)
; RV32I-NEXT: sw zero, 16(sp)
; RV32I-NEXT: sw zero, 20(sp)
+; RV32I-NEXT: slli s3, s3, 8
+; RV32I-NEXT: or s1, s3, s1
+; RV32I-NEXT: addi s3, sp, 40
; RV32I-NEXT: slli t4, t4, 16
-; RV32I-NEXT: slli s0, s0, 24
-; RV32I-NEXT: or t4, s0, t4
-; RV32I-NEXT: addi s0, sp, 40
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: slli t5, t5, 16
-; RV32I-NEXT: slli s1, s1, 24
-; RV32I-NEXT: slli s5, s5, 8
-; RV32I-NEXT: slli s6, s6, 16
-; RV32I-NEXT: slli t6, t6, 24
-; RV32I-NEXT: slli s8, s8, 8
-; RV32I-NEXT: slli s9, s9, 16
-; RV32I-NEXT: slli s10, s10, 24
-; RV32I-NEXT: or t1, t2, t1
+; RV32I-NEXT: slli t5, t5, 24
+; RV32I-NEXT: slli s4, s4, 8
+; RV32I-NEXT: slli s5, s5, 16
+; RV32I-NEXT: slli s6, s6, 24
+; RV32I-NEXT: slli s7, s7, 8
+; RV32I-NEXT: slli s8, s8, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or t4, t5, t4
+; RV32I-NEXT: or t5, s4, t6
+; RV32I-NEXT: or t6, s6, s5
+; RV32I-NEXT: or a3, s7, a3
+; RV32I-NEXT: or a1, a1, s8
+; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: or a4, a4, s4
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: or a6, t0, a7
+; RV32I-NEXT: or a7, t2, t1
+; RV32I-NEXT: or t0, a0, t3
+; RV32I-NEXT: or t1, s2, s0
+; RV32I-NEXT: or t2, t4, s1
+; RV32I-NEXT: or t3, t6, t5
+; RV32I-NEXT: or a0, a1, a3
+; RV32I-NEXT: sw t0, 56(sp)
+; RV32I-NEXT: sw t1, 60(sp)
+; RV32I-NEXT: sw t2, 64(sp)
+; RV32I-NEXT: sw t3, 68(sp)
+; RV32I-NEXT: sw a4, 40(sp)
+; RV32I-NEXT: sw a5, 44(sp)
+; RV32I-NEXT: sw a6, 48(sp)
+; RV32I-NEXT: sw a7, 52(sp)
; RV32I-NEXT: srli a1, a0, 3
-; RV32I-NEXT: or t2, s1, t5
-; RV32I-NEXT: andi t5, a0, 31
-; RV32I-NEXT: or t3, s5, t3
-; RV32I-NEXT: or t6, t6, s6
-; RV32I-NEXT: or s1, s8, s4
-; RV32I-NEXT: or s4, s10, s9
-; RV32I-NEXT: andi s5, a1, 28
-; RV32I-NEXT: xori a1, t5, 31
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a5, t0, a7
-; RV32I-NEXT: or a6, s2, s7
-; RV32I-NEXT: or a7, t4, s3
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or t1, t6, t3
-; RV32I-NEXT: or t2, s4, s1
-; RV32I-NEXT: sub t3, s0, s5
-; RV32I-NEXT: sw a7, 56(sp)
-; RV32I-NEXT: sw t0, 60(sp)
-; RV32I-NEXT: sw t1, 64(sp)
-; RV32I-NEXT: sw t2, 68(sp)
-; RV32I-NEXT: sw a3, 40(sp)
-; RV32I-NEXT: sw a4, 44(sp)
-; RV32I-NEXT: sw a5, 48(sp)
-; RV32I-NEXT: sw a6, 52(sp)
-; RV32I-NEXT: lw a3, 0(t3)
-; RV32I-NEXT: lw a4, 4(t3)
-; RV32I-NEXT: lw a5, 8(t3)
-; RV32I-NEXT: lw a6, 12(t3)
-; RV32I-NEXT: lw a7, 16(t3)
-; RV32I-NEXT: lw t0, 20(t3)
-; RV32I-NEXT: lw t1, 24(t3)
-; RV32I-NEXT: lw t2, 28(t3)
-; RV32I-NEXT: sll t3, a4, a0
-; RV32I-NEXT: srli t4, a3, 1
-; RV32I-NEXT: sll t5, a6, a0
-; RV32I-NEXT: srli t6, a5, 1
-; RV32I-NEXT: sll a5, a5, a0
-; RV32I-NEXT: srli a4, a4, 1
-; RV32I-NEXT: sll s0, t0, a0
-; RV32I-NEXT: srli s1, a7, 1
-; RV32I-NEXT: sll a7, a7, a0
-; RV32I-NEXT: srli a6, a6, 1
+; RV32I-NEXT: andi a3, a0, 31
+; RV32I-NEXT: andi a4, a1, 28
+; RV32I-NEXT: xori a1, a3, 31
+; RV32I-NEXT: sub a3, s3, a4
+; RV32I-NEXT: lw a4, 0(a3)
+; RV32I-NEXT: lw a5, 4(a3)
+; RV32I-NEXT: lw a6, 8(a3)
+; RV32I-NEXT: lw a7, 12(a3)
+; RV32I-NEXT: lw t0, 16(a3)
+; RV32I-NEXT: lw t1, 20(a3)
+; RV32I-NEXT: lw t2, 24(a3)
+; RV32I-NEXT: lw a3, 28(a3)
+; RV32I-NEXT: sll t3, a5, a0
+; RV32I-NEXT: srli t4, a4, 1
+; RV32I-NEXT: sll t5, a7, a0
+; RV32I-NEXT: srli t6, a6, 1
+; RV32I-NEXT: sll a6, a6, a0
+; RV32I-NEXT: srli a5, a5, 1
+; RV32I-NEXT: sll s0, t1, a0
+; RV32I-NEXT: srli s1, t0, 1
+; RV32I-NEXT: sll t0, t0, a0
+; RV32I-NEXT: srli a7, a7, 1
+; RV32I-NEXT: sll s2, a3, a0
+; RV32I-NEXT: srli a3, t2, 1
; RV32I-NEXT: sll t2, t2, a0
-; RV32I-NEXT: srli s2, t1, 1
-; RV32I-NEXT: sll t1, t1, a0
-; RV32I-NEXT: srli t0, t0, 1
-; RV32I-NEXT: sll s3, a3, a0
+; RV32I-NEXT: srli t1, t1, 1
+; RV32I-NEXT: sll s3, a4, a0
; RV32I-NEXT: srl a0, t4, a1
-; RV32I-NEXT: srl a3, t6, a1
-; RV32I-NEXT: srl a4, a4, a1
+; RV32I-NEXT: srl a4, t6, a1
+; RV32I-NEXT: srl a5, a5, a1
; RV32I-NEXT: srl t4, s1, a1
-; RV32I-NEXT: srl a6, a6, a1
-; RV32I-NEXT: srl t6, s2, a1
-; RV32I-NEXT: srl t0, t0, a1
+; RV32I-NEXT: srl a7, a7, a1
+; RV32I-NEXT: srl t6, a3, a1
+; RV32I-NEXT: srl t1, t1, a1
; RV32I-NEXT: srli s1, s3, 24
-; RV32I-NEXT: srli s2, s3, 16
-; RV32I-NEXT: srli s4, s3, 8
+; RV32I-NEXT: srli s4, s3, 16
+; RV32I-NEXT: srli s5, s3, 8
; RV32I-NEXT: or a0, t3, a0
-; RV32I-NEXT: or a1, t5, a3
-; RV32I-NEXT: or a3, a5, a4
+; RV32I-NEXT: or a1, t5, a4
+; RV32I-NEXT: or a3, a6, a5
; RV32I-NEXT: or a4, s0, t4
-; RV32I-NEXT: or a5, a7, a6
-; RV32I-NEXT: or a6, t2, t6
-; RV32I-NEXT: or a7, t1, t0
+; RV32I-NEXT: or a5, t0, a7
+; RV32I-NEXT: or a6, s2, t6
+; RV32I-NEXT: or a7, t2, t1
; RV32I-NEXT: sb s3, 0(a2)
-; RV32I-NEXT: sb s4, 1(a2)
-; RV32I-NEXT: sb s2, 2(a2)
+; RV32I-NEXT: sb s5, 1(a2)
+; RV32I-NEXT: sb s4, 2(a2)
; RV32I-NEXT: sb s1, 3(a2)
; RV32I-NEXT: srli t0, a7, 24
; RV32I-NEXT: srli t1, a7, 16
@@ -2152,17 +2251,19 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-LABEL: ashr_32bytes:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -144
-; RV64I-NEXT: sd s0, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s1, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s2, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s3, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s4, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s5, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s6, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s7, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s8, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT: sd s9, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -160
+; RV64I-NEXT: sd s0, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s1, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s2, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s3, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s4, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s5, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s6, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s7, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s8, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s9, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s10, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sd s11, 64(sp) # 8-byte Folded Spill
; RV64I-NEXT: lbu a3, 0(a0)
; RV64I-NEXT: lbu a4, 1(a0)
; RV64I-NEXT: lbu a5, 2(a0)
@@ -2179,123 +2280,144 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: lbu s1, 13(a0)
; RV64I-NEXT: lbu s2, 14(a0)
; RV64I-NEXT: lbu s3, 15(a0)
-; RV64I-NEXT: slli a4, a4, 8
-; RV64I-NEXT: slli a5, a5, 16
-; RV64I-NEXT: slli a6, a6, 24
-; RV64I-NEXT: or a3, a4, a3
-; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: lbu s4, 16(a0)
; RV64I-NEXT: lbu s5, 17(a0)
; RV64I-NEXT: lbu s6, 18(a0)
; RV64I-NEXT: lbu s7, 19(a0)
+; RV64I-NEXT: slli a4, a4, 8
+; RV64I-NEXT: slli a5, a5, 16
+; RV64I-NEXT: slli a6, a6, 24
; RV64I-NEXT: slli t0, t0, 8
; RV64I-NEXT: slli t1, t1, 16
; RV64I-NEXT: slli t2, t2, 24
+; RV64I-NEXT: or a3, a4, a3
+; RV64I-NEXT: or a4, a6, a5
+; RV64I-NEXT: or a5, t0, a7
+; RV64I-NEXT: or a6, t2, t1
+; RV64I-NEXT: lbu s8, 20(a0)
+; RV64I-NEXT: lbu s9, 21(a0)
+; RV64I-NEXT: lbu s10, 22(a0)
+; RV64I-NEXT: lbu s11, 23(a0)
; RV64I-NEXT: slli t4, t4, 8
; RV64I-NEXT: slli t5, t5, 16
; RV64I-NEXT: slli t6, t6, 24
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a6, t2, t1
-; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: lbu t5, 20(a0)
-; RV64I-NEXT: lbu t6, 21(a0)
-; RV64I-NEXT: lbu s8, 22(a0)
-; RV64I-NEXT: lbu s9, 23(a0)
; RV64I-NEXT: slli s1, s1, 8
; RV64I-NEXT: slli s2, s2, 16
; RV64I-NEXT: slli s3, s3, 24
+; RV64I-NEXT: or a7, t4, t3
+; RV64I-NEXT: or t0, t6, t5
+; RV64I-NEXT: or t1, s1, s0
+; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: lbu t6, 24(a0)
+; RV64I-NEXT: lbu s0, 25(a0)
+; RV64I-NEXT: lbu s1, 26(a0)
+; RV64I-NEXT: lbu s2, 27(a0)
; RV64I-NEXT: slli s5, s5, 8
; RV64I-NEXT: slli s6, s6, 16
; RV64I-NEXT: slli s7, s7, 24
-; RV64I-NEXT: or t1, s1, s0
-; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: slli s9, s9, 8
; RV64I-NEXT: or t3, s5, s4
; RV64I-NEXT: or t4, s7, s6
-; RV64I-NEXT: lbu s0, 24(a0)
-; RV64I-NEXT: lbu s1, 25(a0)
-; RV64I-NEXT: lbu s2, 26(a0)
-; RV64I-NEXT: lbu s3, 27(a0)
-; RV64I-NEXT: slli t6, t6, 8
-; RV64I-NEXT: slli s8, s8, 16
-; RV64I-NEXT: slli s9, s9, 24
-; RV64I-NEXT: slli s1, s1, 8
-; RV64I-NEXT: or t5, t6, t5
-; RV64I-NEXT: or t6, s9, s8
-; RV64I-NEXT: or s0, s1, s0
-; RV64I-NEXT: lbu s1, 28(a0)
+; RV64I-NEXT: or t5, s9, s8
+; RV64I-NEXT: lbu s3, 28(a0)
; RV64I-NEXT: lbu s4, 29(a0)
; RV64I-NEXT: lbu s5, 30(a0)
; RV64I-NEXT: lbu s6, 31(a0)
-; RV64I-NEXT: lbu a0, 0(a1)
-; RV64I-NEXT: slli s2, s2, 16
-; RV64I-NEXT: slli s3, s3, 24
-; RV64I-NEXT: or a1, s3, s2
-; RV64I-NEXT: mv s2, sp
+; RV64I-NEXT: slli s10, s10, 16
+; RV64I-NEXT: slli s11, s11, 24
+; RV64I-NEXT: slli s0, s0, 8
+; RV64I-NEXT: slli s1, s1, 16
+; RV64I-NEXT: slli s2, s2, 24
; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or a0, s11, s10
+; RV64I-NEXT: or t6, s0, t6
+; RV64I-NEXT: or s0, s2, s1
+; RV64I-NEXT: or s1, s4, s3
+; RV64I-NEXT: lbu s2, 0(a1)
+; RV64I-NEXT: lbu s3, 1(a1)
+; RV64I-NEXT: lbu s4, 2(a1)
+; RV64I-NEXT: lbu s7, 3(a1)
; RV64I-NEXT: slli s5, s5, 16
; RV64I-NEXT: slli s6, s6, 24
-; RV64I-NEXT: or s1, s4, s1
-; RV64I-NEXT: srli s3, a0, 3
-; RV64I-NEXT: or s4, s6, s5
-; RV64I-NEXT: andi s5, a0, 63
-; RV64I-NEXT: andi s3, s3, 24
-; RV64I-NEXT: xori s5, s5, 63
+; RV64I-NEXT: slli s3, s3, 8
+; RV64I-NEXT: slli s4, s4, 16
+; RV64I-NEXT: slli s7, s7, 24
+; RV64I-NEXT: or s5, s6, s5
+; RV64I-NEXT: or s2, s3, s2
+; RV64I-NEXT: or s3, s7, s4
+; RV64I-NEXT: lbu s4, 5(a1)
+; RV64I-NEXT: lbu s6, 4(a1)
+; RV64I-NEXT: lbu s7, 6(a1)
+; RV64I-NEXT: lbu a1, 7(a1)
+; RV64I-NEXT: slli s4, s4, 8
+; RV64I-NEXT: or s4, s4, s6
+; RV64I-NEXT: slli s7, s7, 16
+; RV64I-NEXT: slli a1, a1, 24
+; RV64I-NEXT: or a1, a1, s7
+; RV64I-NEXT: mv s6, sp
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
; RV64I-NEXT: or a5, t0, a7
; RV64I-NEXT: or a6, t2, t1
; RV64I-NEXT: or a7, t4, t3
-; RV64I-NEXT: or t0, t6, t5
-; RV64I-NEXT: or a1, a1, s0
-; RV64I-NEXT: or t1, s4, s1
-; RV64I-NEXT: add s2, s2, s3
+; RV64I-NEXT: or a0, a0, t5
+; RV64I-NEXT: or t0, s0, t6
+; RV64I-NEXT: or t1, s5, s1
+; RV64I-NEXT: or t2, s3, s2
+; RV64I-NEXT: or a1, a1, s4
; RV64I-NEXT: slli a4, a4, 32
; RV64I-NEXT: slli a6, a6, 32
-; RV64I-NEXT: slli t0, t0, 32
-; RV64I-NEXT: slli t2, t1, 32
+; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: slli t3, t1, 32
+; RV64I-NEXT: slli a1, a1, 32
; RV64I-NEXT: sraiw t1, t1, 31
; RV64I-NEXT: or a3, a4, a3
; RV64I-NEXT: or a4, a6, a5
-; RV64I-NEXT: or a5, t0, a7
-; RV64I-NEXT: or a1, t2, a1
+; RV64I-NEXT: or a0, a0, a7
+; RV64I-NEXT: or a5, t3, t0
+; RV64I-NEXT: or a1, a1, t2
; RV64I-NEXT: sd t1, 32(sp)
; RV64I-NEXT: sd t1, 40(sp)
; RV64I-NEXT: sd t1, 48(sp)
; RV64I-NEXT: sd t1, 56(sp)
; RV64I-NEXT: sd a3, 0(sp)
; RV64I-NEXT: sd a4, 8(sp)
-; RV64I-NEXT: sd a5, 16(sp)
-; RV64I-NEXT: sd a1, 24(sp)
-; RV64I-NEXT: ld a1, 8(s2)
-; RV64I-NEXT: ld a3, 16(s2)
-; RV64I-NEXT: ld a4, 0(s2)
-; RV64I-NEXT: ld a5, 24(s2)
-; RV64I-NEXT: srl a6, a1, a0
-; RV64I-NEXT: slli a7, a3, 1
-; RV64I-NEXT: srl a4, a4, a0
-; RV64I-NEXT: slli a1, a1, 1
-; RV64I-NEXT: srl a3, a3, a0
+; RV64I-NEXT: sd a0, 16(sp)
+; RV64I-NEXT: sd a5, 24(sp)
+; RV64I-NEXT: srli a0, a1, 3
+; RV64I-NEXT: andi a3, a1, 63
+; RV64I-NEXT: andi a0, a0, 24
+; RV64I-NEXT: xori a3, a3, 63
+; RV64I-NEXT: add a0, s6, a0
+; RV64I-NEXT: ld a4, 8(a0)
+; RV64I-NEXT: ld a5, 16(a0)
+; RV64I-NEXT: ld a6, 0(a0)
+; RV64I-NEXT: ld a0, 24(a0)
+; RV64I-NEXT: srl a7, a4, a1
; RV64I-NEXT: slli t0, a5, 1
-; RV64I-NEXT: sra a5, a5, a0
-; RV64I-NEXT: sll a0, a7, s5
-; RV64I-NEXT: sll a1, a1, s5
-; RV64I-NEXT: sll a7, t0, s5
-; RV64I-NEXT: srli t0, a5, 56
-; RV64I-NEXT: srli t1, a5, 48
-; RV64I-NEXT: srli t2, a5, 40
-; RV64I-NEXT: srli t3, a5, 32
-; RV64I-NEXT: srli t4, a5, 24
-; RV64I-NEXT: srli t5, a5, 16
-; RV64I-NEXT: srli t6, a5, 8
-; RV64I-NEXT: or a0, a6, a0
-; RV64I-NEXT: or a1, a4, a1
-; RV64I-NEXT: or a3, a3, a7
+; RV64I-NEXT: srl a6, a6, a1
+; RV64I-NEXT: slli a4, a4, 1
+; RV64I-NEXT: srl a5, a5, a1
+; RV64I-NEXT: slli t1, a0, 1
+; RV64I-NEXT: sra t2, a0, a1
+; RV64I-NEXT: sll a0, t0, a3
+; RV64I-NEXT: sll a1, a4, a3
+; RV64I-NEXT: sll a3, t1, a3
+; RV64I-NEXT: srli a4, t2, 56
+; RV64I-NEXT: srli t0, t2, 48
+; RV64I-NEXT: srli t1, t2, 40
+; RV64I-NEXT: srli t3, t2, 32
+; RV64I-NEXT: srli t4, t2, 24
+; RV64I-NEXT: srli t5, t2, 16
+; RV64I-NEXT: srli t6, t2, 8
+; RV64I-NEXT: or a0, a7, a0
+; RV64I-NEXT: or a1, a6, a1
+; RV64I-NEXT: or a3, a5, a3
; RV64I-NEXT: sb t3, 28(a2)
-; RV64I-NEXT: sb t2, 29(a2)
-; RV64I-NEXT: sb t1, 30(a2)
-; RV64I-NEXT: sb t0, 31(a2)
-; RV64I-NEXT: sb a5, 24(a2)
+; RV64I-NEXT: sb t1, 29(a2)
+; RV64I-NEXT: sb t0, 30(a2)
+; RV64I-NEXT: sb a4, 31(a2)
+; RV64I-NEXT: sb t2, 24(a2)
; RV64I-NEXT: sb t6, 25(a2)
; RV64I-NEXT: sb t5, 26(a2)
; RV64I-NEXT: sb t4, 27(a2)
@@ -2316,45 +2438,47 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV64I-NEXT: srli s3, a0, 56
; RV64I-NEXT: srli s4, a0, 48
; RV64I-NEXT: srli s5, a0, 40
+; RV64I-NEXT: srli s6, a0, 32
; RV64I-NEXT: sb a7, 20(a2)
; RV64I-NEXT: sb a6, 21(a2)
; RV64I-NEXT: sb a5, 22(a2)
; RV64I-NEXT: sb a4, 23(a2)
-; RV64I-NEXT: srli a4, a0, 32
+; RV64I-NEXT: srli a4, a0, 24
; RV64I-NEXT: sb a3, 16(a2)
; RV64I-NEXT: sb t2, 17(a2)
; RV64I-NEXT: sb t1, 18(a2)
; RV64I-NEXT: sb t0, 19(a2)
-; RV64I-NEXT: srli a3, a0, 24
+; RV64I-NEXT: srli a3, a0, 16
; RV64I-NEXT: sb t6, 4(a2)
; RV64I-NEXT: sb t5, 5(a2)
; RV64I-NEXT: sb t4, 6(a2)
; RV64I-NEXT: sb t3, 7(a2)
-; RV64I-NEXT: srli a5, a0, 16
+; RV64I-NEXT: srli a5, a0, 8
; RV64I-NEXT: sb a1, 0(a2)
; RV64I-NEXT: sb s2, 1(a2)
; RV64I-NEXT: sb s1, 2(a2)
; RV64I-NEXT: sb s0, 3(a2)
-; RV64I-NEXT: srli a1, a0, 8
-; RV64I-NEXT: sb a4, 12(a2)
+; RV64I-NEXT: sb s6, 12(a2)
; RV64I-NEXT: sb s5, 13(a2)
; RV64I-NEXT: sb s4, 14(a2)
; RV64I-NEXT: sb s3, 15(a2)
; RV64I-NEXT: sb a0, 8(a2)
-; RV64I-NEXT: sb a1, 9(a2)
-; RV64I-NEXT: sb a5, 10(a2)
-; RV64I-NEXT: sb a3, 11(a2)
-; RV64I-NEXT: ld s0, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s1, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s2, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s3, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s4, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s5, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s6, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s7, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s8, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT: ld s9, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 144
+; RV64I-NEXT: sb a5, 9(a2)
+; RV64I-NEXT: sb a3, 10(a2)
+; RV64I-NEXT: sb a4, 11(a2)
+; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s2, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s3, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s4, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s5, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s6, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s7, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s8, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s9, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s10, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT: ld s11, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 160
; RV64I-NEXT: ret
;
; RV32I-LABEL: ashr_32bytes:
@@ -2379,148 +2503,159 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
; RV32I-NEXT: lbu a7, 3(a0)
; RV32I-NEXT: lbu a5, 4(a0)
; RV32I-NEXT: lbu t0, 5(a0)
-; RV32I-NEXT: lbu t3, 6(a0)
-; RV32I-NEXT: lbu t4, 7(a0)
-; RV32I-NEXT: lbu t6, 8(a0)
-; RV32I-NEXT: lbu s0, 9(a0)
-; RV32I-NEXT: lbu s4, 10(a0)
-; RV32I-NEXT: lbu s5, 11(a0)
-; RV32I-NEXT: lbu s6, 12(a0)
-; RV32I-NEXT: lbu s7, 13(a0)
-; RV32I-NEXT: lbu s8, 14(a0)
-; RV32I-NEXT: lbu s9, 15(a0)
-; RV32I-NEXT: lbu s10, 16(a0)
-; RV32I-NEXT: lbu s11, 17(a0)
-; RV32I-NEXT: lbu s2, 18(a0)
-; RV32I-NEXT: lbu s3, 19(a0)
+; RV32I-NEXT: lbu t1, 6(a0)
+; RV32I-NEXT: lbu t2, 7(a0)
+; RV32I-NEXT: lbu t3, 8(a0)
+; RV32I-NEXT: lbu t4, 9(a0)
+; RV32I-NEXT: lbu t5, 10(a0)
+; RV32I-NEXT: lbu t6, 11(a0)
+; RV32I-NEXT: lbu s0, 12(a0)
+; RV32I-NEXT: lbu s1, 13(a0)
+; RV32I-NEXT: lbu s2, 14(a0)
+; RV32I-NEXT: lbu s3, 15(a0)
+; RV32I-NEXT: lbu s4, 16(a0)
+; RV32I-NEXT: lbu s5, 17(a0)
+; RV32I-NEXT: lbu s6, 18(a0)
+; RV32I-NEXT: lbu s7, 19(a0)
; RV32I-NEXT: slli a4, a4, 8
; RV32I-NEXT: slli a6, a6, 16
; RV32I-NEXT: slli a7, a7, 24
; RV32I-NEXT: or a3, a4, a3
+; RV32I-NEXT: sw a3, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: or a4, a7, a6
-; RV32I-NEXT: lbu t1, 20(a0)
-; RV32I-NEXT: lbu t2, 21(a0)
-; RV32I-NEXT: lbu t5, 22(a0)
-; RV32I-NEXT: lbu s1, 23(a0)
+; RV32I-NEXT: lbu s8, 20(a0)
+; RV32I-NEXT: lbu s9, 21(a0)
+; RV32I-NEXT: lbu s10, 22(a0)
+; RV32I-NEXT: lbu s11, 23(a0)
; RV32I-NEXT: slli t0, t0, 8
-; RV32I-NEXT: slli t3, t3, 16
-; RV32I-NEXT: slli t4, t4, 24
-; RV32I-NEXT: slli s0, s0, 8
-; RV32I-NEXT: slli s4, s4, 16
-; RV32I-NEXT: slli s5, s5, 24
+; RV32I-NEXT: slli t1, t1, 16
+; RV32I-NEXT: slli t2, t2, 24
+; RV32I-NEXT: slli t4, t4, 8
+; RV32I-NEXT: slli t5, t5, 16
+; RV32I-NEXT: slli t6, t6, 24
; RV32I-NEXT: or a5, t0, a5
-; RV32I-NEXT: or a6, t4, t3
-; RV32I-NEXT: or a7, s0, t6
-; RV32I-NEXT: or t0, s5, s4
-; RV32I-NEXT: lbu t3, 24(a0)
-; RV32I-NEXT: lbu s4, 25(a0)
-; RV32I-NEXT: lbu s5, 26(a0)
-; RV32I-NEXT: lbu ra, 27(a0)
-; RV32I-NEXT: slli s7, s7, 8
-; RV32I-NEXT: slli s8, s8, 16
-; RV32I-NEXT: slli s9, s9, 24
-; RV32I-NEXT: slli s11, s11, 8
-; RV32I-NEXT: or t4, s7, s6
-; RV32I-NEXT: or t6, s9, s8
-; RV32I-NEXT: or s0, s11, s10
-; RV32I-NEXT: lbu s6, 28(a0)
-; RV32I-NEXT: lbu s7, 29(a0)
-; RV32I-NEXT: lbu s8, 30(a0)
-; RV32I-NEXT: lbu s9, 31(a0)
-; RV32I-NEXT: lbu a0, 0(a1)
+; RV32I-NEXT: or a6, t2, t1
+; RV32I-NEXT: or a7, t4, t3
+; RV32I-NEXT: or t0, t6, t5
+; RV32I-NEXT: lbu ra, 24(a0)
+; RV32I-NEXT: lbu a3, 25(a0)
+; RV32I-NEXT: lbu t4, 26(a0)
+; RV32I-NEXT: lbu t5, 27(a0)
+; RV32I-NEXT: slli s1, s1, 8
; RV32I-NEXT: slli s2, s2, 16
; RV32I-NEXT: slli s3, s3, 24
-; RV32I-NEXT: or s2, s3, s2
-; RV32I-NEXT: addi s3, sp, 8
-; RV32I-NEXT: slli t2, t2, 8
-; RV32I-NEXT: slli t5, t5, 16
-; RV32I-NEXT: slli s1, s1, 24
-; RV32I-NEXT: slli s4, s4, 8
-; RV32I-NEXT: slli s5, s5, 16
-; RV32I-NEXT: slli ra, ra, 24
-; RV32I-NEXT: slli s7, s7, 8
-; RV32I-NEXT: slli s8, s8, 16
-; RV32I-NEXT: slli s9, s9, 24
-; RV32I-NEXT: or t1, t2, t1
-; RV32I-NEXT: srli a1, a0, 3
+; RV32I-NEXT: slli s5, s5, 8
+; RV32I-NEXT: or t1, s1, s0
+; RV32I-NEXT: or t2, s3, s2
+; RV32I-NEXT: or t3, s5, s4
+; RV32I-NEXT: lbu t6, 28(a0)
+; RV32I-NEXT: lbu s0, 29(a0)
+; RV32I-NEXT: lbu s1, 30(a0)
+; RV32I-NEXT: lbu a0, 31(a0)
+; RV32I-NEXT: slli s6, s6, 16
+; RV32I-NEXT: slli s7, s7, 24
+; RV32I-NEXT: slli s9, s9, 8
+; RV32I-NEXT: slli s10, s10, 16
+; RV32I-NEXT: slli s11, s11, 24
+; RV32I-NEXT: or s2, s7, s6
+; RV32I-NEXT: or s3, s9, s8
+; RV32I-NEXT: or s4, s11, s10
+; RV32I-NEXT: lbu s5, 0(a1)
+; RV32I-NEXT: lbu s6, 1(a1)
+; RV32I-NEXT: lbu s7, 2(a1)
+; RV32I-NEXT: lbu a1, 3(a1)
+; RV32I-NEXT: slli a3, a3, 8
+; RV32I-NEXT: or a3, a3, ra
+; RV32I-NEXT: addi s8, sp, 8
+; RV32I-NEXT: slli t4, t4, 16
+; RV32I-NEXT: slli t5, t5, 24
+; RV32I-NEXT: slli s0, s0, 8
+; RV32I-NEXT: slli s1, s1, 16
+; RV32I-NEXT: slli a0, a0, 24
+; RV32I-NEXT: slli s6, s6, 8
+; RV32I-NEXT: slli s7, s7, 16
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: or t4, t5, t4
+; RV32I-NEXT: or t5, s0, t6
+; RV32I-NEXT: or s1, a0, s1
+; RV32I-NEXT: or t6, s6, s5
+; RV32I-NEXT: or a1, a1, s7
+; RV32I-NEXT: srai s0, a0, 31
+; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT: or a4, a4, a0
+; RV32I-NEXT: or a5, a6, a5
+; RV32I-NEXT: or a6, t0, a7
+; RV32I-NEXT: or a7, t2, t1
+; RV32I-NEXT: or t0, s2, t3
+; RV32I-NEXT: or t1, s4, s3
+; RV32I-NEXT: or a3, t4, a3
; RV32I-NEXT: or t2, s1, t5
-; RV32I-NEXT: andi t5, a0, 31
-; RV32I-NEXT: or t3, s4, t3
-; RV32I-NEXT: or s1, ra, s5
-; RV32I-NEXT: or s4, s7, s6
-; RV32I-NEXT: or s5, s9, s8
-; RV32I-NEXT: srai s6, s9, 31
-; RV32I-NEXT: andi s7, a1, 28
-; RV32I-NEXT: xori a1, t5, 31
-; RV32I-NEXT: or a3, a4, a3
-; RV32I-NEXT: or a4, a6, a5
-; RV32I-NEXT: or a5, t0, a7
-; RV32I-NEXT: or a6, t6, t4
-; RV32I-NEXT: or a7, s2, s0
-; RV32I-NEXT: or t0, t2, t1
-; RV32I-NEXT: or t1, s1, t3
-; RV32I-NEXT: or t2, s5, s4
-; RV32I-NEXT: sw s6, 56(sp)
-; RV32I-NEXT: sw s6, 60(sp)
-; RV32I-NEXT: sw s6, 64(sp)
-; RV32I-NEXT: sw s6, 68(sp)
-; RV32I-NEXT: sw s6, 40(sp)
-; RV32I-NEXT: sw s6, 44(sp)
-; RV32I-NEXT: sw s6, 48(sp)
-; RV32I-NEXT: sw s6, 52(sp)
-; RV32I-NEXT: add s3, s3, s7
-; RV32I-NEXT: sw a7, 24(sp)
-; RV32I-NEXT: sw t0, 28(sp)
-; RV32I-NEXT: sw t1, 32(sp)
+; RV32I-NEXT: or a0, a1, t6
+; RV32I-NEXT: sw s0, 56(sp)
+; RV32I-NEXT: sw s0, 60(sp)
+; RV32I-NEXT: sw s0, 64(sp)
+; RV32I-NEXT: sw s0, 68(sp)
+; RV32I-NEXT: sw s0, 40(sp)
+; RV32I-NEXT: sw s0, 44(sp)
+; RV32I-NEXT: sw s0, 48(sp)
+; RV32I-NEXT: sw s0, 52(sp)
+; RV32I-NEXT: sw t0, 24(sp)
+; RV32I-NEXT: sw t1, 28(sp)
+; RV32I-NEXT: sw a3, 32(sp)
; RV32I-NEXT: sw t2, 36(sp)
-; RV32I-NEXT: sw a3, 8(sp)
-; RV32I-NEXT: sw a4, 12(sp)
-; RV32I-NEXT: sw a5, 16(sp)
-; RV32I-NEXT: sw a6, 20(sp)
-; RV32I-NEXT: lw a3, 0(s3)
-; RV32I-NEXT: lw a4, 4(s3)
-; RV32I-NEXT: lw a5, 8(s3)
-; RV32I-NEXT: lw a6, 12(s3)
-; RV32I-NEXT: lw a7, 16(s3)
-; RV32I-NEXT: lw t0, 20(s3)
-; RV32I-NEXT: lw t1, 24(s3)
-; RV32I-NEXT: lw t2, 28(s3)
-; RV32I-NEXT: srl t3, a4, a0
-; RV32I-NEXT: slli t4, a5, 1
+; RV32I-NEXT: sw a4, 8(sp)
+; RV32I-NEXT: sw a5, 12(sp)
+; RV32I-NEXT: sw a6, 16(sp)
+; RV32I-NEXT: sw a7, 20(sp)
+; RV32I-NEXT: srli a1, a0, 3
+; RV32I-NEXT: andi a3, a0, 31
+; RV32I-NEXT: andi a4, a1, 28
+; RV32I-NEXT: xori a1, a3, 31
+; RV32I-NEXT: add a4, s8, a4
+; RV32I-NEXT: lw a3, 0(a4)
+; RV32I-NEXT: lw a5, 4(a4)
+; RV32I-NEXT: lw a6, 8(a4)
+; RV32I-NEXT: lw a7, 12(a4)
+; RV32I-NEXT: lw t0, 16(a4)
+; RV32I-NEXT: lw t1, 20(a4)
+; RV32I-NEXT: lw t2, 24(a4)
+; RV32I-NEXT: lw a4, 28(a4)
+; RV32I-NEXT: srl t3, a5, a0
+; RV32I-NEXT: slli t4, a6, 1
; RV32I-NEXT: srl a3, a3, a0
-; RV32I-NEXT: slli a4, a4, 1
-; RV32I-NEXT: srl t5, a6, a0
-; RV32I-NEXT: slli t6, a7, 1
-; RV32I-NEXT: srl a5, a5, a0
-; RV32I-NEXT: slli a6, a6, 1
-; RV32I-NEXT: srl s0, t0, a0
-; RV32I-NEXT: slli s1, t1, 1
-; RV32I-NEXT: srl a7, a7, a0
-; RV32I-NEXT: slli t0, t0, 1
-; RV32I-NEXT: srl t1, t1, a0
-; RV32I-NEXT: slli s2, t2, 1
-; RV32I-NEXT: sra t2, t2, a0
+; RV32I-NEXT: slli a5, a5, 1
+; RV32I-NEXT: srl t5, a7, a0
+; RV32I-NEXT: slli t6, t0, 1
+; RV32I-NEXT: srl a6, a6, a0
+; RV32I-NEXT: slli a7, a7, 1
+; RV32I-NEXT: srl s0, t1, a0
+; RV32I-NEXT: slli s1, t2, 1
+; RV32I-NEXT: srl t0, t0, a0
+; RV32I-NEXT: slli t1, t1, 1
+; RV32I-NEXT: srl t2, t2, a0
+; RV32I-NEXT: slli s2, a4, 1
+; RV32I-NEXT: sra s3, a4, a0
; RV32I-NEXT: sll a0, t4, a1
-; RV32I-NEXT: sll a4, a4, a1
-; RV32I-NEXT: sll t4, t6, a1
-; RV32I-NEXT: sll a6, a6, a1
-; RV32I-NEXT: sll t6, s1, a1
-; RV32I-NEXT: sll t0, t0, a1
-; RV32I-NEXT: sll s1, s2, a1
-; RV32I-NEXT: srli s2, t2, 24
-; RV32I-NEXT: srli s3, t2, 16
-; RV32I-NEXT: srli s4, t2, 8
+; RV32I-NEXT: sll a4, a5, a1
+; RV32I-NEXT: sll a5, t6, a1
+; RV32I-NEXT: sll a7, a7, a1
+; RV32I-NEXT: sll t4, s1, a1
+; RV32I-NEXT: sll t1, t1, a1
+; RV32I-NEXT: sll t6, s2, a1
+; RV32I-NEXT: srli s1, s3, 24
+; RV32I-NEXT: srli s2, s3, 16
+; RV32I-NEXT: srli s4, s3, 8
; RV32I-NEXT: or a0, t3, a0
; RV32I-NEXT: or a1, a3, a4
-; RV32I-NEXT: or a3, t5, t4
-; RV32I-NEXT: or a4, a5, a6
-; RV32I-NEXT: or a5, s0, t6
-; RV32I-NEXT: or a6, a7, t0
-; RV32I-NEXT: or a7, t1, s1
-; RV32I-NEXT: sb t2, 28(a2)
+; RV32I-NEXT: or a3, t5, a5
+; RV32I-NEXT: or a4, a6, a7
+; RV32I-NEXT: or a5, s0, t4
+; RV32I-NEXT: or a6, t0, t1
+; RV32I-NEXT: or a7, t2, t6
+; RV32I-NEXT: sb s3, 28(a2)
; RV32I-NEXT: sb s4, 29(a2)
-; RV32I-NEXT: sb s3, 30(a2)
-; RV32I-NEXT: sb s2, 31(a2)
+; RV32I-NEXT: sb s2, 30(a2)
+; RV32I-NEXT: sb s1, 31(a2)
; RV32I-NEXT: srli t0, a7, 24
; RV32I-NEXT: srli t1, a7, 16
; RV32I-NEXT: srli t2, a7, 8
diff --git a/llvm/test/CodeGen/RISCV/xqcisls.ll b/llvm/test/CodeGen/RISCV/xqcisls.ll
index 709dc4c..3dea540 100644
--- a/llvm/test/CodeGen/RISCV/xqcisls.ll
+++ b/llvm/test/CodeGen/RISCV/xqcisls.ll
@@ -308,13 +308,13 @@ define i64 @lrd(ptr %a, i32 %b) {
;
; RV32IZBAXQCISLS-LABEL: lrd:
; RV32IZBAXQCISLS: # %bb.0:
-; RV32IZBAXQCISLS-NEXT: qc.lrw a2, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT: addi a0, a0, 4
-; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT: add a0, a2, a2
-; RV32IZBAXQCISLS-NEXT: sltu a2, a0, a2
-; RV32IZBAXQCISLS-NEXT: add a1, a1, a1
-; RV32IZBAXQCISLS-NEXT: add a1, a1, a2
+; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0
+; RV32IZBAXQCISLS-NEXT: lw a1, 0(a0)
+; RV32IZBAXQCISLS-NEXT: lw a2, 4(a0)
+; RV32IZBAXQCISLS-NEXT: add a0, a1, a1
+; RV32IZBAXQCISLS-NEXT: sltu a1, a0, a1
+; RV32IZBAXQCISLS-NEXT: add a2, a2, a2
+; RV32IZBAXQCISLS-NEXT: add a1, a2, a1
; RV32IZBAXQCISLS-NEXT: ret
%1 = getelementptr i64, ptr %a, i32 %b
%2 = load i64, ptr %1, align 8
@@ -348,14 +348,13 @@ define i64 @lrd_2(ptr %a, i32 %b) {
;
; RV32IZBAXQCISLS-LABEL: lrd_2:
; RV32IZBAXQCISLS: # %bb.0:
-; RV32IZBAXQCISLS-NEXT: addi a2, a0, 96
-; RV32IZBAXQCISLS-NEXT: qc.lrw a2, a2, a1, 3
-; RV32IZBAXQCISLS-NEXT: addi a0, a0, 100
-; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT: add a0, a2, a2
-; RV32IZBAXQCISLS-NEXT: sltu a2, a0, a2
-; RV32IZBAXQCISLS-NEXT: add a1, a1, a1
-; RV32IZBAXQCISLS-NEXT: add a1, a1, a2
+; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0
+; RV32IZBAXQCISLS-NEXT: lw a1, 96(a0)
+; RV32IZBAXQCISLS-NEXT: lw a2, 100(a0)
+; RV32IZBAXQCISLS-NEXT: add a0, a1, a1
+; RV32IZBAXQCISLS-NEXT: sltu a1, a0, a1
+; RV32IZBAXQCISLS-NEXT: add a2, a2, a2
+; RV32IZBAXQCISLS-NEXT: add a1, a2, a1
; RV32IZBAXQCISLS-NEXT: ret
%1 = add i32 %b, 12
%2 = getelementptr i64, ptr %a, i32 %1
@@ -472,11 +471,11 @@ define void @srd(ptr %a, i32 %b, i64 %c) {
; RV32IZBAXQCISLS: # %bb.0:
; RV32IZBAXQCISLS-NEXT: add a4, a2, a2
; RV32IZBAXQCISLS-NEXT: add a3, a3, a3
-; RV32IZBAXQCISLS-NEXT: sltu a2, a4, a2
-; RV32IZBAXQCISLS-NEXT: qc.srw a4, a0, a1, 3
-; RV32IZBAXQCISLS-NEXT: add a2, a3, a2
-; RV32IZBAXQCISLS-NEXT: addi a0, a0, 4
-; RV32IZBAXQCISLS-NEXT: qc.srw a2, a0, a1, 3
+; RV32IZBAXQCISLS-NEXT: sh3add a0, a1, a0
+; RV32IZBAXQCISLS-NEXT: sltu a1, a4, a2
+; RV32IZBAXQCISLS-NEXT: add a1, a3, a1
+; RV32IZBAXQCISLS-NEXT: sw a4, 0(a0)
+; RV32IZBAXQCISLS-NEXT: sw a1, 4(a0)
; RV32IZBAXQCISLS-NEXT: ret
%1 = add i64 %c, %c
%2 = getelementptr i64, ptr %a, i32 %b
@@ -503,10 +502,10 @@ define i64 @lrd_large_shift(ptr %a, i32 %b) {
;
; RV32IZBAXQCISLS-LABEL: lrd_large_shift:
; RV32IZBAXQCISLS: # %bb.0:
-; RV32IZBAXQCISLS-NEXT: addi a2, a0, 384
-; RV32IZBAXQCISLS-NEXT: addi a3, a0, 388
-; RV32IZBAXQCISLS-NEXT: qc.lrw a0, a2, a1, 5
-; RV32IZBAXQCISLS-NEXT: qc.lrw a1, a3, a1, 5
+; RV32IZBAXQCISLS-NEXT: slli a1, a1, 5
+; RV32IZBAXQCISLS-NEXT: add a1, a1, a0
+; RV32IZBAXQCISLS-NEXT: lw a0, 384(a1)
+; RV32IZBAXQCISLS-NEXT: lw a1, 388(a1)
; RV32IZBAXQCISLS-NEXT: ret
%1 = add i32 %b, 12
%2 = shl i32 %1, 2
diff --git a/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll
index cdaae23..5724c4f 100644
--- a/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadfmemidx.ll
@@ -1,33 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d -mattr=+xtheadfmemidx -mattr=+m -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefix=RV32XTHEADMEMIDX
-; RUN: llc -mtriple=riscv64 -mattr=+d -mattr=+xtheadfmemidx -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefix=RV64XTHEADFMEMIDX
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d,+xtheadfmemidx \
+; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV32XTHEADFMEMIDX
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d,+xtheadfmemidx \
+; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV64XTHEADFMEMIDX
-define float @flrw(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: flrw:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.flrw fa5, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: fadd.s fa0, fa5, fa5
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADFMEMIDX-LABEL: flrw:
-; RV64XTHEADFMEMIDX: # %bb.0:
-; RV64XTHEADFMEMIDX-NEXT: th.flrw fa5, a0, a1, 2
-; RV64XTHEADFMEMIDX-NEXT: fadd.s fa0, fa5, fa5
-; RV64XTHEADFMEMIDX-NEXT: ret
- %1 = getelementptr float, ptr %a, i64 %b
+define float @flrw(ptr %a, iXLen %b) {
+; CHECK-LABEL: flrw:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.flrw fa5, a0, a1, 2
+; CHECK-NEXT: fadd.s fa0, fa5, fa5
+; CHECK-NEXT: ret
+ %1 = getelementptr float, ptr %a, iXLen %b
%2 = load float, ptr %1, align 4
%3 = fadd float %2, %2
ret float %3
}
define float @flurw(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: flurw:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.flrw fa5, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: fadd.s fa0, fa5, fa5
-; RV32XTHEADMEMIDX-NEXT: ret
+; RV32XTHEADFMEMIDX-LABEL: flurw:
+; RV32XTHEADFMEMIDX: # %bb.0:
+; RV32XTHEADFMEMIDX-NEXT: th.flrw fa5, a0, a1, 2
+; RV32XTHEADFMEMIDX-NEXT: fadd.s fa0, fa5, fa5
+; RV32XTHEADFMEMIDX-NEXT: ret
;
; RV64XTHEADFMEMIDX-LABEL: flurw:
; RV64XTHEADFMEMIDX: # %bb.0:
@@ -41,30 +35,24 @@ define float @flurw(ptr %a, i32 %b) {
ret float %4
}
-define void @fsrw(ptr %a, i64 %b, float %c) {
-; RV32XTHEADMEMIDX-LABEL: fsrw:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: fadd.s fa5, fa0, fa0
-; RV32XTHEADMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADFMEMIDX-LABEL: fsrw:
-; RV64XTHEADFMEMIDX: # %bb.0:
-; RV64XTHEADFMEMIDX-NEXT: fadd.s fa5, fa0, fa0
-; RV64XTHEADFMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2
-; RV64XTHEADFMEMIDX-NEXT: ret
+define void @fsrw(ptr %a, iXLen %b, float %c) {
+; CHECK-LABEL: fsrw:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fadd.s fa5, fa0, fa0
+; CHECK-NEXT: th.fsrw fa5, a0, a1, 2
+; CHECK-NEXT: ret
%1 = fadd float %c, %c
- %2 = getelementptr float, ptr %a, i64 %b
+ %2 = getelementptr float, ptr %a, iXLen %b
store float %1, ptr %2, align 4
ret void
}
define void @fsurw(ptr %a, i32 %b, float %c) {
-; RV32XTHEADMEMIDX-LABEL: fsurw:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: fadd.s fa5, fa0, fa0
-; RV32XTHEADMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: ret
+; RV32XTHEADFMEMIDX-LABEL: fsurw:
+; RV32XTHEADFMEMIDX: # %bb.0:
+; RV32XTHEADFMEMIDX-NEXT: fadd.s fa5, fa0, fa0
+; RV32XTHEADFMEMIDX-NEXT: th.fsrw fa5, a0, a1, 2
+; RV32XTHEADFMEMIDX-NEXT: ret
;
; RV64XTHEADFMEMIDX-LABEL: fsurw:
; RV64XTHEADFMEMIDX: # %bb.0:
@@ -78,30 +66,24 @@ define void @fsurw(ptr %a, i32 %b, float %c) {
ret void
}
-define double @flrd(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: flrd:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.flrd fa5, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: fadd.d fa0, fa5, fa5
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADFMEMIDX-LABEL: flrd:
-; RV64XTHEADFMEMIDX: # %bb.0:
-; RV64XTHEADFMEMIDX-NEXT: th.flrd fa5, a0, a1, 3
-; RV64XTHEADFMEMIDX-NEXT: fadd.d fa0, fa5, fa5
-; RV64XTHEADFMEMIDX-NEXT: ret
- %1 = getelementptr double, ptr %a, i64 %b
+define double @flrd(ptr %a, iXLen %b) {
+; CHECK-LABEL: flrd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.flrd fa5, a0, a1, 3
+; CHECK-NEXT: fadd.d fa0, fa5, fa5
+; CHECK-NEXT: ret
+ %1 = getelementptr double, ptr %a, iXLen %b
%2 = load double, ptr %1, align 8
%3 = fadd double %2, %2
ret double %3
}
define double @flurd(ptr %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: flurd:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.flrd fa5, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: fadd.d fa0, fa5, fa5
-; RV32XTHEADMEMIDX-NEXT: ret
+; RV32XTHEADFMEMIDX-LABEL: flurd:
+; RV32XTHEADFMEMIDX: # %bb.0:
+; RV32XTHEADFMEMIDX-NEXT: th.flrd fa5, a0, a1, 3
+; RV32XTHEADFMEMIDX-NEXT: fadd.d fa0, fa5, fa5
+; RV32XTHEADFMEMIDX-NEXT: ret
;
; RV64XTHEADFMEMIDX-LABEL: flurd:
; RV64XTHEADFMEMIDX: # %bb.0:
@@ -115,30 +97,24 @@ define double @flurd(ptr %a, i32 %b) {
ret double %4
}
-define void @fsrd(ptr %a, i64 %b, double %c) {
-; RV32XTHEADMEMIDX-LABEL: fsrd:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: fadd.d fa5, fa0, fa0
-; RV32XTHEADMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADFMEMIDX-LABEL: fsrd:
-; RV64XTHEADFMEMIDX: # %bb.0:
-; RV64XTHEADFMEMIDX-NEXT: fadd.d fa5, fa0, fa0
-; RV64XTHEADFMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3
-; RV64XTHEADFMEMIDX-NEXT: ret
+define void @fsrd(ptr %a, iXLen %b, double %c) {
+; CHECK-LABEL: fsrd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fadd.d fa5, fa0, fa0
+; CHECK-NEXT: th.fsrd fa5, a0, a1, 3
+; CHECK-NEXT: ret
%1 = fadd double %c, %c
- %2 = getelementptr double, ptr %a, i64 %b
+ %2 = getelementptr double, ptr %a, iXLen %b
store double %1, ptr %2, align 8
ret void
}
define void @fsurd(ptr %a, i32 %b, double %c) {
-; RV32XTHEADMEMIDX-LABEL: fsurd:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: fadd.d fa5, fa0, fa0
-; RV32XTHEADMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: ret
+; RV32XTHEADFMEMIDX-LABEL: fsurd:
+; RV32XTHEADFMEMIDX: # %bb.0:
+; RV32XTHEADFMEMIDX-NEXT: fadd.d fa5, fa0, fa0
+; RV32XTHEADFMEMIDX-NEXT: th.fsrd fa5, a0, a1, 3
+; RV32XTHEADFMEMIDX-NEXT: ret
;
; RV64XTHEADFMEMIDX-LABEL: fsurd:
; RV64XTHEADFMEMIDX: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
index fc20fcb..9f0f8d9 100644
--- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
+++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll
@@ -1,238 +1,156 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefix=RV32XTHEADMEMIDX
-; RUN: llc -mtriple=riscv64 -mattr=+d -mattr=+xtheadmemidx -mattr=+m -verify-machineinstrs < %s \
-; RUN: | FileCheck %s -check-prefix=RV64XTHEADMEMIDX
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d,+xtheadmemidx \
+; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV32XTHEADMEMIDX
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d,+xtheadmemidx \
+; RUN: -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,RV64XTHEADMEMIDX
define ptr @lbia(ptr %base, ptr %addr.2, i8 %a) {
-; RV32XTHEADMEMIDX-LABEL: lbia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lbia a3, (a0), -1, 0
-; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT: sb a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lbia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lbia a3, (a0), -1, 0
-; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT: sb a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i8, ptr %base, i8 0
+; CHECK-LABEL: lbia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lbia a3, (a0), -1, 0
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: sb a2, 0(a1)
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %base, iXLen 0
%ld = load i8, ptr %addr
- %addr.1 = getelementptr i8, ptr %base, i8 -1
+ %addr.1 = getelementptr i8, ptr %base, iXLen -1
%res = add i8 %ld, %a
store i8 %res, ptr %addr.2
ret ptr %addr.1
}
define ptr @lbib(ptr %base, i8 %a) {
-; RV32XTHEADMEMIDX-LABEL: lbib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lbib a2, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT: sb a1, 1(a0)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lbib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lbib a2, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV64XTHEADMEMIDX-NEXT: sb a1, 1(a0)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i8, ptr %base, i8 1
+; CHECK-LABEL: lbib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lbib a2, (a0), 1, 0
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: sb a1, 1(a0)
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %base, iXLen 1
%ld = load i8, ptr %addr
- %addr.1 = getelementptr i8, ptr %base, i8 2
+ %addr.1 = getelementptr i8, ptr %base, iXLen 2
%res = add i8 %ld, %a
store i8 %res, ptr %addr.1
ret ptr %addr
}
-define ptr @lbuia(ptr %base, ptr %addr.2, i64 %a) {
-; RV32XTHEADMEMIDX-LABEL: lbuia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lbuia a4, (a0), -1, 0
-; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2
-; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a4
-; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4
-; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lbuia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lbuia a3, (a0), -1, 0
-; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i8, ptr %base, i8 0
+define ptr @lbuia(ptr %base, ptr %addr.2, i32 %a) {
+; CHECK-LABEL: lbuia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lbuia a3, (a0), -1, 0
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: sw a2, 0(a1)
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %base, iXLen 0
%ld = load i8, ptr %addr
- %zext = zext i8 %ld to i64
- %addr.1 = getelementptr i8, ptr %base, i8 -1
- %res = add i64 %zext, %a
- store i64 %res, ptr %addr.2
+ %zext = zext i8 %ld to i32
+ %addr.1 = getelementptr i8, ptr %base, iXLen -1
+ %res = add i32 %zext, %a
+ store i32 %res, ptr %addr.2
ret ptr %addr.1
}
-define ptr @lbuib(ptr %base, i64 %a, ptr %addr.1) {
-; RV32XTHEADMEMIDX-LABEL: lbuib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lbuib a4, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT: add a1, a4, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a4, a1, a4
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4
-; RV32XTHEADMEMIDX-NEXT: sw a1, 0(a3)
-; RV32XTHEADMEMIDX-NEXT: sw a2, 4(a3)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lbuib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lbuib a3, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1
-; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i8, ptr %base, i8 1
+define ptr @lbuib(ptr %base, i32 %a, ptr %addr.1) {
+; CHECK-LABEL: lbuib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lbuib a3, (a0), 1, 0
+; CHECK-NEXT: add a1, a3, a1
+; CHECK-NEXT: sw a1, 0(a2)
+; CHECK-NEXT: ret
+ %addr = getelementptr i8, ptr %base, iXLen 1
%ld = load i8, ptr %addr
- %zext = zext i8 %ld to i64
- %res = add i64 %zext, %a
- store i64 %res, ptr %addr.1
+ %zext = zext i8 %ld to i32
+ %res = add i32 %zext, %a
+ store i32 %res, ptr %addr.1
ret ptr %addr
}
define ptr @lhia(ptr %base, ptr %addr.2, i16 %a) {
-; RV32XTHEADMEMIDX-LABEL: lhia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lhia a3, (a0), -16, 1
-; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT: sh a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lhia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lhia a3, (a0), -16, 1
-; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT: sh a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i16, ptr %base, i16 0
+; CHECK-LABEL: lhia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lhia a3, (a0), -16, 1
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: sh a2, 0(a1)
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %base, iXLen 0
%ld = load i16, ptr %addr
- %addr.1 = getelementptr i16, ptr %base, i16 -16
+ %addr.1 = getelementptr i16, ptr %base, iXLen -16
%res = add i16 %ld, %a
store i16 %res, ptr %addr.2
ret ptr %addr.1
}
define ptr @lhib(ptr %base, i16 %a) {
-; RV32XTHEADMEMIDX-LABEL: lhib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lhib a2, (a0), 2, 0
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT: sh a1, 2(a0)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lhib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lhib a2, (a0), 2, 0
-; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV64XTHEADMEMIDX-NEXT: sh a1, 2(a0)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i16, ptr %base, i16 1
+; CHECK-LABEL: lhib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lhib a2, (a0), 2, 0
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: sh a1, 2(a0)
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %base, iXLen 1
%ld = load i16, ptr %addr
- %addr.1 = getelementptr i16, ptr %base, i16 2
+ %addr.1 = getelementptr i16, ptr %base, iXLen 2
%res = add i16 %ld, %a
store i16 %res, ptr %addr.1
ret ptr %addr
}
-define ptr @lhuia(ptr %base, ptr %addr.2, i64 %a) {
-; RV32XTHEADMEMIDX-LABEL: lhuia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lhuia a4, (a0), -16, 1
-; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2
-; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a4
-; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4
-; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lhuia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lhuia a3, (a0), -16, 1
-; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i16, ptr %base, i16 0
+define ptr @lhuia(ptr %base, ptr %addr.2, i32 %a) {
+; CHECK-LABEL: lhuia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lhuia a3, (a0), -16, 1
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: sw a2, 0(a1)
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %base, iXLen 0
%ld = load i16, ptr %addr
- %zext = zext i16 %ld to i64
- %addr.1 = getelementptr i16, ptr %base, i16 -16
- %res = add i64 %zext, %a
- store i64 %res, ptr %addr.2
+ %zext = zext i16 %ld to i32
+ %addr.1 = getelementptr i16, ptr %base, iXLen -16
+ %res = add i32 %zext, %a
+ store i32 %res, ptr %addr.2
ret ptr %addr.1
}
-define ptr @lhuib(ptr %base, i64 %a, ptr %addr.1) {
-; RV32XTHEADMEMIDX-LABEL: lhuib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lhuib a4, (a0), 2, 0
-; RV32XTHEADMEMIDX-NEXT: add a1, a4, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a4, a1, a4
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a4
-; RV32XTHEADMEMIDX-NEXT: sw a1, 0(a3)
-; RV32XTHEADMEMIDX-NEXT: sw a2, 4(a3)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lhuib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lhuib a3, (a0), 2, 0
-; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1
-; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i16, ptr %base, i16 1
+define ptr @lhuib(ptr %base, i32 %a, ptr %addr.1) {
+; CHECK-LABEL: lhuib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lhuib a3, (a0), 2, 0
+; CHECK-NEXT: add a1, a3, a1
+; CHECK-NEXT: sw a1, 0(a2)
+; CHECK-NEXT: ret
+ %addr = getelementptr i16, ptr %base, iXLen 1
%ld = load i16, ptr %addr
- %zext = zext i16 %ld to i64
- %res = add i64 %zext, %a
- store i64 %res, ptr %addr.1
+ %zext = zext i16 %ld to i32
+ %res = add i32 %zext, %a
+ store i32 %res, ptr %addr.1
ret ptr %addr
}
define ptr @lwia(ptr %base, ptr %addr.2, i32 %a) {
-; RV32XTHEADMEMIDX-LABEL: lwia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lwia a3, (a0), -16, 2
-; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lwia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lwia a3, (a0), -16, 2
-; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
-; RV64XTHEADMEMIDX-NEXT: sw a2, 0(a1)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i32, ptr %base, i32 0
+; CHECK-LABEL: lwia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lwia a3, (a0), -16, 2
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: sw a2, 0(a1)
+; CHECK-NEXT: ret
+ %addr = getelementptr i32, ptr %base, iXLen 0
%ld = load i32, ptr %addr
- %addr.1 = getelementptr i32, ptr %base, i32 -16
+ %addr.1 = getelementptr i32, ptr %base, iXLen -16
%res = add i32 %ld, %a
store i32 %res, ptr %addr.2
ret ptr %addr.1
}
define ptr @lwib(ptr %base, i32 %a) {
-; RV32XTHEADMEMIDX-LABEL: lwib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lwib a2, (a0), 4, 0
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT: sw a1, 4(a0)
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lwib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lwib a2, (a0), 4, 0
-; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV64XTHEADMEMIDX-NEXT: sw a1, 4(a0)
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i32, ptr %base, i32 1
+; CHECK-LABEL: lwib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lwib a2, (a0), 4, 0
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: sw a1, 4(a0)
+; CHECK-NEXT: ret
+ %addr = getelementptr i32, ptr %base, iXLen 1
%ld = load i32, ptr %addr
- %addr.1 = getelementptr i32, ptr %base, i32 2
+ %addr.1 = getelementptr i32, ptr %base, iXLen 2
%res = add i32 %ld, %a
store i32 %res, ptr %addr.1
ret ptr %addr
@@ -255,10 +173,10 @@ define ptr @lwuia(ptr %base, ptr %addr.2, i64 %a) {
; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1)
; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i32, ptr %base, i32 0
+ %addr = getelementptr i32, ptr %base, iXLen 0
%ld = load i32, ptr %addr
%zext = zext i32 %ld to i64
- %addr.1 = getelementptr i32, ptr %base, i32 -16
+ %addr.1 = getelementptr i32, ptr %base, iXLen -16
%res = add i64 %zext, %a
store i64 %res, ptr %addr.2
ret ptr %addr.1
@@ -281,7 +199,7 @@ define ptr @lwuib(ptr %base, i64 %a, ptr %addr.1) {
; RV64XTHEADMEMIDX-NEXT: add a1, a3, a1
; RV64XTHEADMEMIDX-NEXT: sd a1, 0(a2)
; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i32, ptr %base, i32 1
+ %addr = getelementptr i32, ptr %base, iXLen 1
%ld = load i32, ptr %addr
%zext = zext i32 %ld to i64
%res = add i64 %zext, %a
@@ -309,9 +227,9 @@ define ptr @ldia(ptr %base, ptr %addr.2, i64 %a) {
; RV64XTHEADMEMIDX-NEXT: add a2, a3, a2
; RV64XTHEADMEMIDX-NEXT: sd a2, 0(a1)
; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i64, ptr %base, i64 0
+ %addr = getelementptr i64, ptr %base, iXLen 0
%ld = load i64, ptr %addr
- %addr.1 = getelementptr i64, ptr %base, i64 -16
+ %addr.1 = getelementptr i64, ptr %base, iXLen -16
%res = add i64 %ld, %a
store i64 %res, ptr %addr.2
ret ptr %addr.1
@@ -336,117 +254,81 @@ define ptr @ldib(ptr %base, i64 %a) {
; RV64XTHEADMEMIDX-NEXT: add a1, a2, a1
; RV64XTHEADMEMIDX-NEXT: sd a1, 8(a0)
; RV64XTHEADMEMIDX-NEXT: ret
- %addr = getelementptr i64, ptr %base, i64 1
+ %addr = getelementptr i64, ptr %base, iXLen 1
%ld = load i64, ptr %addr
- %addr.1 = getelementptr i64, ptr %base, i64 2
+ %addr.1 = getelementptr i64, ptr %base, iXLen 2
%res = add i64 %ld, %a
store i64 %res, ptr %addr.1
ret ptr %addr
}
define ptr @sbia(ptr %base, i8 %a, i8 %b) {
-; RV32XTHEADMEMIDX-LABEL: sbia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.sbia a1, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: sbia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.sbia a1, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i8, ptr %base, i8 1
+; CHECK-LABEL: sbia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.sbia a1, (a0), 1, 0
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i8, ptr %base, iXLen 1
%res = add i8 %a, %b
store i8 %res, ptr %base
ret ptr %addr.1
}
define ptr @sbib(ptr %base, i8 %a, i8 %b) {
-; RV32XTHEADMEMIDX-LABEL: sbib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.sbib a1, (a0), 1, 0
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: sbib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.sbib a1, (a0), 1, 0
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i8, ptr %base, i8 1
+; CHECK-LABEL: sbib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.sbib a1, (a0), 1, 0
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i8, ptr %base, iXLen 1
%res = add i8 %a, %b
store i8 %res, ptr %addr.1
ret ptr %addr.1
}
define ptr @shia(ptr %base, i16 %a, i16 %b) {
-; RV32XTHEADMEMIDX-LABEL: shia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.shia a1, (a0), -9, 1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: shia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.shia a1, (a0), -9, 1
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i16, ptr %base, i16 -9
+; CHECK-LABEL: shia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.shia a1, (a0), -9, 1
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i16, ptr %base, iXLen -9
%res = add i16 %a, %b
store i16 %res, ptr %base
ret ptr %addr.1
}
define ptr @shib(ptr %base, i16 %a, i16 %b) {
-; RV32XTHEADMEMIDX-LABEL: shib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.shib a1, (a0), 2, 0
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: shib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.shib a1, (a0), 2, 0
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i16, ptr %base, i16 1
+; CHECK-LABEL: shib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.shib a1, (a0), 2, 0
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i16, ptr %base, iXLen 1
%res = add i16 %a, %b
store i16 %res, ptr %addr.1
ret ptr %addr.1
}
define ptr @swia(ptr %base, i32 %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: swia:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.swia a1, (a0), 8, 2
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: swia:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.swia a1, (a0), 8, 2
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i32, ptr %base, i32 8
+; CHECK-LABEL: swia:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.swia a1, (a0), 8, 2
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i32, ptr %base, iXLen 8
%res = add i32 %a, %b
store i32 %res, ptr %base
ret ptr %addr.1
}
define ptr @swib(ptr %base, i32 %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: swib:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.swib a1, (a0), -13, 3
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: swib:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.swib a1, (a0), -13, 3
-; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i32, ptr %base, i32 -26
+; CHECK-LABEL: swib:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.swib a1, (a0), -13, 3
+; CHECK-NEXT: ret
+ %addr.1 = getelementptr i32, ptr %base, iXLen -26
%res = add i32 %a, %b
store i32 %res, ptr %addr.1
ret ptr %addr.1
@@ -470,7 +352,7 @@ define ptr @sdia(ptr %base, i64 %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
; RV64XTHEADMEMIDX-NEXT: th.sdia a1, (a0), 8, 3
; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i64, ptr %base, i64 8
+ %addr.1 = getelementptr i64, ptr %base, iXLen 8
%res = add i64 %a, %b
store i64 %res, ptr %base
ret ptr %addr.1
@@ -492,48 +374,33 @@ define ptr @sdib(ptr %base, i64 %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
; RV64XTHEADMEMIDX-NEXT: th.sdib a1, (a0), 8, 0
; RV64XTHEADMEMIDX-NEXT: ret
- %addr.1 = getelementptr i64, ptr %base, i64 1
+ %addr.1 = getelementptr i64, ptr %base, iXLen 1
%res = add i64 %a, %b
store i64 %res, ptr %addr.1
ret ptr %addr.1
}
-define i8 @lrb_anyext(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrb_anyext:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrb_anyext:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i8, ptr %a, i64 %b
+define i8 @lrb_anyext(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrb_anyext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrb a0, a0, a1, 0
+; CHECK-NEXT: ret
+ %1 = getelementptr i8, ptr %a, iXLen %b
%2 = load i8, ptr %1, align 1
ret i8 %2
}
-define i64 @lrb(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrb:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrb a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrb:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i8, ptr %a, i64 %b
+define i32 @lrb(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrb:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrb a0, a0, a1, 0
+; CHECK-NEXT: add a0, a0, a0
+; CHECK-NEXT: ret
+ %1 = getelementptr i8, ptr %a, iXLen %b
%2 = load i8, ptr %1, align 1
- %3 = sext i8 %2 to i64
- %4 = add i64 %3, %3
- ret i64 %4
+ %3 = sext i8 %2 to i32
+ %4 = add i32 %3, %3
+ ret i32 %4
}
define i8 @lurb_anyext(ptr %a, i32 %b) {
@@ -552,15 +419,11 @@ define i8 @lurb_anyext(ptr %a, i32 %b) {
ret i8 %3
}
-define i64 @lurb(ptr %a, i32 %b) {
+define i32 @lurb(ptr %a, i32 %b) {
; RV32XTHEADMEMIDX-LABEL: lurb:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrb a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
+; RV32XTHEADMEMIDX-NEXT: th.lrb a0, a0, a1, 0
+; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lurb:
@@ -571,37 +434,29 @@ define i64 @lurb(ptr %a, i32 %b) {
%1 = zext i32 %b to i64
%2 = getelementptr i8, ptr %a, i64 %1
%3 = load i8, ptr %2, align 1
- %4 = sext i8 %3 to i64
- %5 = add i64 %4, %4
- ret i64 %5
-}
-
-define i64 @lrbu(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrbu:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrbu a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrbu:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrbu a0, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i8, ptr %a, i64 %b
+ %4 = sext i8 %3 to i32
+ %5 = add i32 %4, %4
+ ret i32 %5
+}
+
+define i32 @lrbu(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrbu:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrbu a0, a0, a1, 0
+; CHECK-NEXT: add a0, a0, a0
+; CHECK-NEXT: ret
+ %1 = getelementptr i8, ptr %a, iXLen %b
%2 = load i8, ptr %1, align 1
- %3 = zext i8 %2 to i64
- %4 = add i64 %3, %3
- ret i64 %4
+ %3 = zext i8 %2 to i32
+ %4 = add i32 %3, %3
+ ret i32 %4
}
-define i64 @lurbu(ptr %a, i32 %b) {
+define i32 @lurbu(ptr %a, i32 %b) {
; RV32XTHEADMEMIDX-LABEL: lurbu:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrbu a1, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMIDX-NEXT: th.lrbu a0, a0, a1, 0
+; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lurbu:
@@ -612,47 +467,32 @@ define i64 @lurbu(ptr %a, i32 %b) {
%1 = zext i32 %b to i64
%2 = getelementptr i8, ptr %a, i64 %1
%3 = load i8, ptr %2, align 1
- %4 = zext i8 %3 to i64
- %5 = add i64 %4, %4
- ret i64 %5
+ %4 = zext i8 %3 to i32
+ %5 = add i32 %4, %4
+ ret i32 %5
}
-define i16 @lrh_anyext(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrh_anyext:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrh_anyext:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i16, ptr %a, i64 %b
+define i16 @lrh_anyext(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrh_anyext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrh a0, a0, a1, 1
+; CHECK-NEXT: ret
+ %1 = getelementptr i16, ptr %a, iXLen %b
%2 = load i16, ptr %1, align 2
ret i16 %2
}
-define i64 @lrh(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrh:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrh a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrh:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i16, ptr %a, i64 %b
+define i32 @lrh(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrh a0, a0, a1, 1
+; CHECK-NEXT: add a0, a0, a0
+; CHECK-NEXT: ret
+ %1 = getelementptr i16, ptr %a, iXLen %b
%2 = load i16, ptr %1, align 2
- %3 = sext i16 %2 to i64
- %4 = add i64 %3, %3
- ret i64 %4
+ %3 = sext i16 %2 to i32
+ %4 = add i32 %3, %3
+ ret i32 %4
}
define i16 @lurh_anyext(ptr %a, i32 %b) {
@@ -671,15 +511,11 @@ define i16 @lurh_anyext(ptr %a, i32 %b) {
ret i16 %3
}
-define i64 @lurh(ptr %a, i32 %b) {
+define i32 @lurh(ptr %a, i32 %b) {
; RV32XTHEADMEMIDX-LABEL: lurh:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrh a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: srai a2, a1, 31
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
+; RV32XTHEADMEMIDX-NEXT: th.lrh a0, a0, a1, 1
+; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lurh:
@@ -690,37 +526,29 @@ define i64 @lurh(ptr %a, i32 %b) {
%1 = zext i32 %b to i64
%2 = getelementptr i16, ptr %a, i64 %1
%3 = load i16, ptr %2, align 2
- %4 = sext i16 %3 to i64
- %5 = add i64 %4, %4
- ret i64 %5
-}
-
-define i64 @lrhu(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrhu:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrhu a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrhu:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrhu a0, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i16, ptr %a, i64 %b
+ %4 = sext i16 %3 to i32
+ %5 = add i32 %4, %4
+ ret i32 %5
+}
+
+define i32 @lrhu(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrhu:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrhu a0, a0, a1, 1
+; CHECK-NEXT: add a0, a0, a0
+; CHECK-NEXT: ret
+ %1 = getelementptr i16, ptr %a, iXLen %b
%2 = load i16, ptr %1, align 2
- %3 = zext i16 %2 to i64
- %4 = add i64 %3, %3
- ret i64 %4
+ %3 = zext i16 %2 to i32
+ %4 = add i32 %3, %3
+ ret i32 %4
}
-define i64 @lurhu(ptr %a, i32 %b) {
+define i32 @lurhu(ptr %a, i32 %b) {
; RV32XTHEADMEMIDX-LABEL: lurhu:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrhu a1, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
-; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMIDX-NEXT: th.lrhu a0, a0, a1, 1
+; RV32XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lurhu:
@@ -731,27 +559,22 @@ define i64 @lurhu(ptr %a, i32 %b) {
%1 = zext i32 %b to i64
%2 = getelementptr i16, ptr %a, i64 %1
%3 = load i16, ptr %2, align 2
- %4 = zext i16 %3 to i64
- %5 = add i64 %4, %4
- ret i64 %5
+ %4 = zext i16 %3 to i32
+ %5 = add i32 %4, %4
+ ret i32 %5
}
-define i32 @lrw_anyext(ptr %a, i64 %b) {
-; RV32XTHEADMEMIDX-LABEL: lrw_anyext:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: th.lrw a0, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: lrw_anyext:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: th.lrw a0, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i32, ptr %a, i64 %b
+define i32 @lrw_anyext(ptr %a, iXLen %b) {
+; CHECK-LABEL: lrw_anyext:
+; CHECK: # %bb.0:
+; CHECK-NEXT: th.lrw a0, a0, a1, 2
+; CHECK-NEXT: ret
+ %1 = getelementptr i32, ptr %a, iXLen %b
%2 = load i32, ptr %1, align 4
ret i32 %2
}
-define i64 @lrw(ptr %a, i64 %b) {
+define i64 @lrw(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrw:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 2
@@ -767,7 +590,7 @@ define i64 @lrw(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: th.lrw a0, a0, a1, 2
; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i32, ptr %a, i64 %b
+ %1 = getelementptr i32, ptr %a, iXLen %b
%2 = load i32, ptr %1, align 4
%3 = sext i32 %2 to i64
%4 = add i64 %3, %3
@@ -814,7 +637,7 @@ define i64 @lurw(ptr %a, i32 %b) {
ret i64 %5
}
-define i64 @lrwu(ptr %a, i64 %b) {
+define i64 @lrwu(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrwu:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 2
@@ -827,7 +650,7 @@ define i64 @lrwu(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: th.lrwu a0, a0, a1, 2
; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i32, ptr %a, i64 %b
+ %1 = getelementptr i32, ptr %a, iXLen %b
%2 = load i32, ptr %1, align 4
%3 = zext i32 %2 to i64
%4 = add i64 %3, %3
@@ -855,7 +678,7 @@ define i64 @lurwu(ptr %a, i32 %b) {
ret i64 %5
}
-define i64 @lrd(ptr %a, i64 %b) {
+define i64 @lrd(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrd:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: th.lrw a2, a0, a1, 3
@@ -872,23 +695,23 @@ define i64 @lrd(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: th.lrd a0, a0, a1, 3
; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = getelementptr i64, ptr %a, i64 %b
+ %1 = getelementptr i64, ptr %a, iXLen %b
%2 = load i64, ptr %1, align 8
%3 = add i64 %2, %2
ret i64 %3
}
-define i64 @lrd_2(ptr %a, i64 %b) {
+define i64 @lrd_2(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrd_2:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: addi a2, a0, 96
-; RV32XTHEADMEMIDX-NEXT: th.lrw a2, a2, a1, 3
-; RV32XTHEADMEMIDX-NEXT: addi a0, a0, 100
-; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: add a0, a2, a2
-; RV32XTHEADMEMIDX-NEXT: sltu a2, a0, a2
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a1
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
+; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3
+; RV32XTHEADMEMIDX-NEXT: add a0, a1, a0
+; RV32XTHEADMEMIDX-NEXT: lw a1, 96(a0)
+; RV32XTHEADMEMIDX-NEXT: lw a2, 100(a0)
+; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1
+; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1
+; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2
+; RV32XTHEADMEMIDX-NEXT: add a1, a2, a1
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: lrd_2:
@@ -897,8 +720,8 @@ define i64 @lrd_2(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: th.lrd a0, a0, a1, 3
; RV64XTHEADMEMIDX-NEXT: add a0, a0, a0
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = add i64 %b, 12
- %2 = getelementptr i64, ptr %a, i64 %1
+ %1 = add iXLen %b, 12
+ %2 = getelementptr i64, ptr %a, iXLen %1
%3 = load i64, ptr %2, align 8
%4 = add i64 %3, %3
ret i64 %4
@@ -928,20 +751,14 @@ define i64 @lurd(ptr %a, i32 %b) {
ret i64 %4
}
-define void @srb(ptr %a, i64 %b, i8 %c) {
-; RV32XTHEADMEMIDX-LABEL: srb:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3
-; RV32XTHEADMEMIDX-NEXT: th.srb a3, a0, a1, 0
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: srb:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT: th.srb a2, a0, a1, 0
-; RV64XTHEADMEMIDX-NEXT: ret
+define void @srb(ptr %a, iXLen %b, i8 %c) {
+; CHECK-LABEL: srb:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a2, a2, a2
+; CHECK-NEXT: th.srb a2, a0, a1, 0
+; CHECK-NEXT: ret
%1 = add i8 %c, %c
- %2 = getelementptr i8, ptr %a, i64 %b
+ %2 = getelementptr i8, ptr %a, iXLen %b
store i8 %1, ptr %2, align 1
ret void
}
@@ -965,20 +782,14 @@ define void @surb(ptr %a, i32 %b, i8 %c) {
ret void
}
-define void @srh(ptr %a, i64 %b, i16 %c) {
-; RV32XTHEADMEMIDX-LABEL: srh:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3
-; RV32XTHEADMEMIDX-NEXT: th.srh a3, a0, a1, 1
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: srh:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT: th.srh a2, a0, a1, 1
-; RV64XTHEADMEMIDX-NEXT: ret
+define void @srh(ptr %a, iXLen %b, i16 %c) {
+; CHECK-LABEL: srh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a2, a2, a2
+; CHECK-NEXT: th.srh a2, a0, a1, 1
+; CHECK-NEXT: ret
%1 = add i16 %c, %c
- %2 = getelementptr i16, ptr %a, i64 %b
+ %2 = getelementptr i16, ptr %a, iXLen %b
store i16 %1, ptr %2, align 2
ret void
}
@@ -1002,20 +813,14 @@ define void @surh(ptr %a, i32 %b, i16 %c) {
ret void
}
-define void @srw(ptr %a, i64 %b, i32 %c) {
-; RV32XTHEADMEMIDX-LABEL: srw:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3
-; RV32XTHEADMEMIDX-NEXT: th.srw a3, a0, a1, 2
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: srw:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a2, a2, a2
-; RV64XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 2
-; RV64XTHEADMEMIDX-NEXT: ret
+define void @srw(ptr %a, iXLen %b, i32 %c) {
+; CHECK-LABEL: srw:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a2, a2, a2
+; CHECK-NEXT: th.srw a2, a0, a1, 2
+; CHECK-NEXT: ret
%1 = add i32 %c, %c
- %2 = getelementptr i32, ptr %a, i64 %b
+ %2 = getelementptr i32, ptr %a, iXLen %b
store i32 %1, ptr %2, align 4
ret void
}
@@ -1039,16 +844,16 @@ define void @surw(ptr %a, i32 %b, i32 %c) {
ret void
}
-define void @srd(ptr %a, i64 %b, i64 %c) {
+define void @srd(ptr %a, iXLen %b, i64 %c) {
; RV32XTHEADMEMIDX-LABEL: srd:
; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a2, a3, a3
-; RV32XTHEADMEMIDX-NEXT: add a4, a4, a4
-; RV32XTHEADMEMIDX-NEXT: sltu a3, a2, a3
-; RV32XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 3
-; RV32XTHEADMEMIDX-NEXT: add a3, a4, a3
+; RV32XTHEADMEMIDX-NEXT: add a4, a2, a2
+; RV32XTHEADMEMIDX-NEXT: add a3, a3, a3
+; RV32XTHEADMEMIDX-NEXT: sltu a2, a4, a2
+; RV32XTHEADMEMIDX-NEXT: th.srw a4, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT: add a2, a3, a2
; RV32XTHEADMEMIDX-NEXT: addi a0, a0, 4
-; RV32XTHEADMEMIDX-NEXT: th.srw a3, a0, a1, 3
+; RV32XTHEADMEMIDX-NEXT: th.srw a2, a0, a1, 3
; RV32XTHEADMEMIDX-NEXT: ret
;
; RV64XTHEADMEMIDX-LABEL: srd:
@@ -1057,7 +862,7 @@ define void @srd(ptr %a, i64 %b, i64 %c) {
; RV64XTHEADMEMIDX-NEXT: th.srd a2, a0, a1, 3
; RV64XTHEADMEMIDX-NEXT: ret
%1 = add i64 %c, %c
- %2 = getelementptr i64, ptr %a, i64 %b
+ %2 = getelementptr i64, ptr %a, iXLen %b
store i64 %1, ptr %2, align 8
ret void
}
@@ -1087,24 +892,18 @@ define void @surd(ptr %a, i32 %b, i64 %c) {
}
define ptr @test_simm5(ptr %base, i32 %a, i32 %b) {
-; RV32XTHEADMEMIDX-LABEL: test_simm5:
-; RV32XTHEADMEMIDX: # %bb.0:
-; RV32XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV32XTHEADMEMIDX-NEXT: th.swia a1, (a0), -12, 2
-; RV32XTHEADMEMIDX-NEXT: ret
-;
-; RV64XTHEADMEMIDX-LABEL: test_simm5:
-; RV64XTHEADMEMIDX: # %bb.0:
-; RV64XTHEADMEMIDX-NEXT: add a1, a1, a2
-; RV64XTHEADMEMIDX-NEXT: th.swia a1, (a0), -12, 2
-; RV64XTHEADMEMIDX-NEXT: ret
+; CHECK-LABEL: test_simm5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: add a1, a1, a2
+; CHECK-NEXT: th.swia a1, (a0), -12, 2
+; CHECK-NEXT: ret
%addr.1 = getelementptr i32, ptr %base, i32 -12
%res = add i32 %a, %b
store i32 %res, ptr %base
ret ptr %addr.1
}
-define i64 @lrd_large_shift(ptr %a, i64 %b) {
+define i64 @lrd_large_shift(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrd_large_shift:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 5
@@ -1119,14 +918,14 @@ define i64 @lrd_large_shift(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: add a0, a1, a0
; RV64XTHEADMEMIDX-NEXT: ld a0, 384(a0)
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = add i64 %b, 12
- %2 = shl i64 %1, 2
- %3 = getelementptr i64, ptr %a, i64 %2
+ %1 = add iXLen %b, 12
+ %2 = shl iXLen %1, 2
+ %3 = getelementptr i64, ptr %a, iXLen %2
%4 = load i64, ptr %3, align 8
ret i64 %4
}
-define i64 @lrd_large_offset(ptr %a, i64 %b) {
+define i64 @lrd_large_offset(ptr %a, iXLen %b) {
; RV32XTHEADMEMIDX-LABEL: lrd_large_offset:
; RV32XTHEADMEMIDX: # %bb.0:
; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3
@@ -1145,8 +944,8 @@ define i64 @lrd_large_offset(ptr %a, i64 %b) {
; RV64XTHEADMEMIDX-NEXT: add a0, a0, a1
; RV64XTHEADMEMIDX-NEXT: ld a0, 1792(a0)
; RV64XTHEADMEMIDX-NEXT: ret
- %1 = add i64 %b, 12000
- %2 = getelementptr i64, ptr %a, i64 %1
+ %1 = add iXLen %b, 12000
+ %2 = getelementptr i64, ptr %a, iXLen %1
%3 = load i64, ptr %2, align 8
ret i64 %3
}