aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/RISCV/rvv/remat.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/RISCV/rvv/remat.ll')
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/remat.ll132
1 files changed, 132 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll
index 06d54fa..95bff27 100644
--- a/llvm/test/CodeGen/RISCV/rvv/remat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll
@@ -301,3 +301,135 @@ define void @vfmv.s.f(ptr %p, double %x) {
store volatile double %x, ptr %p
ret void
}
+
+; This test is fairly fragile, but it's trying to cover the case which
+; caused the revert of bba9172 due to interaction with how rematerialize
+; instructions are pruned from the original live interval. In the result
+; below, we remat the vmv.v.x into the loop, but fail to remat the vmv.v.x
+; a second time after further splitting it's live range. We shouldn't need
+; to spill it to the stack at all.
+define i64 @dual_remat(i64 %0, <vscale x 16 x i64> %1, <vscale x 16 x i64> %2, ptr %p) #0 {
+; CHECK-LABEL: dual_remat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 5
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x21, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 33 * vlenb
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v16, 0
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: srli a1, a2, 3
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, a3, a2
+; CHECK-NEXT: vmv.v.i v0, 0
+; CHECK-NEXT: .LBB8_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: mv a5, a4
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: add a5, a5, a4
+; CHECK-NEXT: slli a4, a4, 1
+; CHECK-NEXT: add a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs8r.v v16, (a4) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a5, a4, 4
+; CHECK-NEXT: add a4, a5, a4
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs8r.v v8, (a4) # vscale x 64-byte Folded Spill
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: mv a5, a4
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: add a5, a5, a4
+; CHECK-NEXT: slli a4, a4, 1
+; CHECK-NEXT: add a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vand.vv v16, v16, v8
+; CHECK-NEXT: vmsne.vi v24, v16, 0
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 4
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vs1r.v v24, (a4) # vscale x 8-byte Folded Spill
+; CHECK-NEXT: vand.vv v16, v0, v8
+; CHECK-NEXT: vmsne.vi v8, v16, 0
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: mv a5, a4
+; CHECK-NEXT: slli a4, a4, 3
+; CHECK-NEXT: add a5, a5, a4
+; CHECK-NEXT: slli a4, a4, 1
+; CHECK-NEXT: add a4, a4, a5
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl8r.v v16, (a4) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: csrr a4, vlenb
+; CHECK-NEXT: slli a4, a4, 4
+; CHECK-NEXT: add a4, sp, a4
+; CHECK-NEXT: addi a4, a4, 16
+; CHECK-NEXT: vl1r.v v9, (a4) # vscale x 8-byte Folded Reload
+; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetvli a4, zero, e8, m2, ta, ma
+; CHECK-NEXT: vcpop.m a4, v9
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: slli a6, a5, 4
+; CHECK-NEXT: add a5, a6, a5
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
+; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vs8r.v v8, (a3)
+; CHECK-NEXT: vs8r.v v8, (a2)
+; CHECK-NEXT: addi a5, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vsetvli a5, zero, e64, m8, ta, ma
+; CHECK-NEXT: vor.vv v16, v16, v8
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: slli a5, a5, 3
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
+; CHECK-NEXT: vl8r.v v8, (a5) # vscale x 64-byte Folded Reload
+; CHECK-NEXT: vor.vv v0, v0, v8
+; CHECK-NEXT: beqz a4, .LBB8_1
+; CHECK-NEXT: # %bb.2: # %middle.block
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 5
+; CHECK-NEXT: add a1, a2, a1
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: .cfi_def_cfa sp, 16
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+entry:
+ %broadcast.splatinsert = insertelement <vscale x 16 x i64> zeroinitializer, i64 %0, i64 0
+ %broadcast.splat = shufflevector <vscale x 16 x i64> %broadcast.splatinsert, <vscale x 16 x i64> zeroinitializer, <vscale x 16 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %entry
+ %vec.ind = phi <vscale x 16 x i64> [ zeroinitializer, %entry ], [ %vec.ind.next, %vector.body ]
+ %3 = and <vscale x 16 x i64> %vec.ind, %broadcast.splat
+ %4 = icmp ne <vscale x 16 x i64> %3, zeroinitializer
+ store <vscale x 16 x i64> %broadcast.splat, ptr %p
+ %5 = tail call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> %4)
+ %vec.ind.next = or <vscale x 16 x i64> %vec.ind, %1
+ br i1 %5, label %middle.block, label %vector.body
+
+middle.block: ; preds = %vector.body
+ %and.i = and i64 1, %0
+ ret i64 %and.i
+}