diff options
Diffstat (limited to 'llvm/test/CodeGen/RISCV/rvv')
101 files changed, 9376 insertions, 10656 deletions
diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index 139579b..9822e0d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -112,9 +112,9 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vmsltu.vx v8, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v0, v16, 2 +; CHECK-NEXT: vslideup.vi v0, v8, 2 ; CHECK-NEXT: ret %mask = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 %index, i64 %tc) ret <32 x i1> %mask @@ -132,27 +132,27 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vmsltu.vx v8, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 2 +; CHECK-NEXT: vslideup.vi v0, v8, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vmsltu.vx v8, v8, a2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 4 +; CHECK-NEXT: vslideup.vi v0, v8, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI9_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vmsltu.vx v8, v8, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v0, v16, 6 +; CHECK-NEXT: vslideup.vi v0, v8, 6 ; CHECK-NEXT: ret %mask = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64 %index, i64 %tc) ret <64 x i1> %mask @@ -170,30 +170,30 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v10, v16, a2 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v8, v16, a2 +; CHECK-NEXT: vsaddu.vx v8, v16, a1 +; CHECK-NEXT: vmsltu.vx v8, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vslideup.vi v8, v0, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: vslideup.vi v8, v0, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_3) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 6 +; CHECK-NEXT: vslideup.vi v8, v0, 6 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI10_4) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) @@ -203,27 +203,27 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v16, v16, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v9, 2 +; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_5) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v16, v16, a2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v9, 4 +; CHECK-NEXT: vslideup.vi v0, v16, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_6) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v16, v16, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v0, v9, 6 +; CHECK-NEXT: vslideup.vi v0, v16, 6 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vi v0, v8, 8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll index 6875925..53d1ae5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll @@ -88,11 +88,11 @@ define <vscale x 16 x i1> @nxv16i1(i1 %x, i1 %y) { ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v10, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v11, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v10, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 0 +; CHECK-NEXT: vmxor.mm v0, v0, v2 ; CHECK-NEXT: ret %head.x = insertelement <vscale x 16 x i1> poison, i1 %x, i32 0 %splat.x = shufflevector <vscale x 16 x i1> %head.x, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer @@ -108,11 +108,11 @@ define <vscale x 32 x i1> @nxv32i1(i1 %x, i1 %y) { ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v12, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v13, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v12, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 0 +; CHECK-NEXT: vmxor.mm v0, v0, v4 ; CHECK-NEXT: ret %head.x = insertelement <vscale x 32 x i1> poison, i1 %x, i32 0 %splat.x = shufflevector <vscale x 32 x i1> %head.x, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer @@ -128,11 +128,11 @@ define <vscale x 64 x i1> @nxv64i1(i1 %x, i1 %y) { ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v16, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v17, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v16, v17 +; CHECK-NEXT: vmsne.vi v8, v8, 0 +; CHECK-NEXT: vmxor.mm v0, v0, v8 ; CHECK-NEXT: ret %head.x = insertelement <vscale x 64 x i1> poison, i1 %x, i32 0 %splat.x = shufflevector <vscale x 64 x i1> %head.x, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index b446175..28de935 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -2335,28 +2335,28 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -2423,21 +2423,21 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t @@ -2697,28 +2697,28 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -2785,21 +2785,21 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index 7fbdfb3..7234601 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -1069,28 +1069,28 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1127,21 +1127,21 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t @@ -1316,28 +1316,28 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1374,21 +1374,21 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll index 78e8700..647d315 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll @@ -162,3 +162,206 @@ define void @caller_tuple_argument({<vscale x 4 x i32>, <vscale x 4 x i32>} %x) } declare void @callee_tuple_argument({<vscale x 4 x i32>, <vscale x 4 x i32>}) + +; %0 -> v8 +; %1 -> v9 +define <vscale x 1 x i64> @case1(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1) { +; CHECK-LABEL: case1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %a = add <vscale x 1 x i64> %0, %1 + ret <vscale x 1 x i64> %a +} + +; %0 -> v8 +; %1 -> v10-v11 +; %2 -> v9 +define <vscale x 1 x i64> @case2_1(<vscale x 1 x i64> %0, <vscale x 2 x i64> %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case2_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %a = add <vscale x 1 x i64> %0, %2 + ret <vscale x 1 x i64> %a +} +define <vscale x 2 x i64> @case2_2(<vscale x 1 x i64> %0, <vscale x 2 x i64> %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case2_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v10 +; CHECK-NEXT: ret + %a = add <vscale x 2 x i64> %1, %1 + ret <vscale x 2 x i64> %a +} + +; %0 -> v8 +; %1 -> {v10-v11, v12-v13} +; %2 -> v9 +define <vscale x 1 x i64> @case3_1(<vscale x 1 x i64> %0, {<vscale x 2 x i64>, <vscale x 2 x i64>} %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case3_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %add = add <vscale x 1 x i64> %0, %2 + ret <vscale x 1 x i64> %add +} +define <vscale x 2 x i64> @case3_2(<vscale x 1 x i64> %0, {<vscale x 2 x i64>, <vscale x 2 x i64>} %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case3_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: ret + %a = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %1, 0 + %b = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %1, 1 + %add = add <vscale x 2 x i64> %a, %b + ret <vscale x 2 x i64> %add +} + +; %0 -> v8 +; %1 -> {by-ref, by-ref} +; %2 -> v9 +define <vscale x 8 x i64> @case4_1(<vscale x 1 x i64> %0, {<vscale x 8 x i64>, <vscale x 8 x i64>} %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case4_1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vl8re64.v v8, (a1) +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: ret + %a = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i64> } %1, 0 + %b = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i64> } %1, 1 + %add = add <vscale x 8 x i64> %a, %b + ret <vscale x 8 x i64> %add +} +define <vscale x 1 x i64> @case4_2(<vscale x 1 x i64> %0, {<vscale x 8 x i64>, <vscale x 8 x i64>} %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case4_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %add = add <vscale x 1 x i64> %0, %2 + ret <vscale x 1 x i64> %add +} + +declare <vscale x 1 x i64> @callee1() +declare void @callee2(<vscale x 1 x i64>) +declare void @callee3(<vscale x 4 x i32>) +define void @caller() { +; RV32-LABEL: caller: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: call callee1 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: call callee2 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: caller: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: call callee1 +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: call callee2 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = call <vscale x 1 x i64> @callee1() + %add = add <vscale x 1 x i64> %a, %a + call void @callee2(<vscale x 1 x i64> %add) + ret void +} + +declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @callee_tuple() +define void @caller_tuple() { +; RV32-LABEL: caller_tuple: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: call callee_tuple +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: call callee3 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: caller_tuple: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: call callee_tuple +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: call callee3 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @callee_tuple() + %b = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %a, 0 + %c = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %a, 1 + %add = add <vscale x 4 x i32> %b, %c + call void @callee3(<vscale x 4 x i32> %add) + ret void +} + +declare {<vscale x 4 x i32>, {<vscale x 4 x i32>, <vscale x 4 x i32>}} @callee_nested() +define void @caller_nested() { +; RV32-LABEL: caller_nested: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: call callee_nested +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: vadd.vv v8, v8, v12 +; RV32-NEXT: call callee3 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: caller_nested: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: call callee_nested +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: call callee3 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = call {<vscale x 4 x i32>, {<vscale x 4 x i32>, <vscale x 4 x i32>}} @callee_nested() + %b = extractvalue {<vscale x 4 x i32>, {<vscale x 4 x i32>, <vscale x 4 x i32>}} %a, 0 + %c = extractvalue {<vscale x 4 x i32>, {<vscale x 4 x i32>, <vscale x 4 x i32>}} %a, 1 + %c0 = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %c, 0 + %c1 = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %c, 1 + %add0 = add <vscale x 4 x i32> %b, %c0 + %add1 = add <vscale x 4 x i32> %add0, %c1 + call void @callee3(<vscale x 4 x i32> %add1) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index ddd5028..d396d6a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -140,11 +140,9 @@ define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -184,11 +182,9 @@ define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -228,11 +224,9 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -356,11 +350,9 @@ define <vscale x 4 x float> @vp_ceil_vv_nxv4f32(<vscale x 4 x float> %va, <vscal ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -400,11 +392,9 @@ define <vscale x 8 x float> @vp_ceil_vv_nxv8f32(<vscale x 8 x float> %va, <vscal ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -444,11 +434,9 @@ define <vscale x 16 x float> @vp_ceil_vv_nxv16f32(<vscale x 16 x float> %va, <vs ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -530,11 +518,9 @@ define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -574,11 +560,9 @@ define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -618,11 +602,9 @@ define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -662,11 +644,9 @@ define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -716,11 +696,9 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, < ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -734,11 +712,9 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, < ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll index f244810..ff35043 100644 --- a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll +++ b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+fast-unaligned-access -target-abi=ilp32 \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+unaligned-vector-mem -target-abi=ilp32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+fast-unaligned-access -target-abi=lp64 \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+unaligned-vector-mem -target-abi=lp64 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @constant_forward_stride(ptr %s, ptr %d) { diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index 3972f52..d74ec69 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -275,8 +275,8 @@ define i1 @extractelt_nxv16i1_idx0(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vl2r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vi v10, v8, 0 -; CHECK-NEXT: vfirst.m a0, v10 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <vscale x 16 x i8>, ptr %x @@ -290,8 +290,8 @@ define i1 @extractelt_nxv32i1_idx0(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vl4r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vi v12, v8, 0 -; CHECK-NEXT: vfirst.m a0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <vscale x 32 x i8>, ptr %x @@ -305,8 +305,8 @@ define i1 @extractelt_nxv64i1_idx0(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vl8r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vi v16, v8, 0 -; CHECK-NEXT: vfirst.m a0, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <vscale x 64 x i8>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll index ee8c322..1d07ac8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll @@ -125,11 +125,11 @@ define <32 x i1> @v32i1(i1 %x, i1 %y) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v10, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v11, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v10, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 0 +; CHECK-NEXT: vmxor.mm v0, v0, v2 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vrgather.vi v10, v8, 0 @@ -150,11 +150,11 @@ define <64 x i1> @v64i1(i1 %x, i1 %y) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v12, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v13, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v12, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 0 +; CHECK-NEXT: vmxor.mm v0, v0, v4 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vrgather.vi v12, v8, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 33e8869..dc2e011 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1709,28 +1709,28 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1794,21 +1794,21 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t @@ -2077,28 +2077,28 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -2162,21 +2162,21 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index ab212bc..095d2fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -803,28 +803,28 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -861,21 +861,21 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t @@ -1038,28 +1038,28 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1096,21 +1096,21 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index 5348e7d..2af0292 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -201,11 +201,9 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 3 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 3 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 3 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -436,11 +430,9 @@ define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,11 +472,9 @@ define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -566,11 +556,9 @@ define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -610,11 +598,9 @@ define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -654,11 +640,9 @@ define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -698,11 +682,9 @@ define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -750,11 +732,9 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -768,11 +748,9 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index 232d453..108bd85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -154,9 +154,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: vmseq.vi v10, v8, 0 +; RV32-NEXT: vmseq.vi v0, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vmv.x.s a0, v0 ; RV32-NEXT: srl a0, a0, a1 ; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: ret @@ -166,9 +166,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: li a2, 32 ; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: vmseq.vi v10, v8, 0 +; RV64-NEXT: vmseq.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vmv.x.s a0, v0 ; RV64-NEXT: srl a0, a0, a1 ; RV64-NEXT: andi a0, a0, 1 ; RV64-NEXT: ret @@ -178,9 +178,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: li a2, 32 ; RV32ZBS-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: vmseq.vi v10, v8, 0 +; RV32ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV32ZBS-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV32ZBS-NEXT: vmv.x.s a0, v10 +; RV32ZBS-NEXT: vmv.x.s a0, v0 ; RV32ZBS-NEXT: bext a0, a0, a1 ; RV32ZBS-NEXT: ret ; @@ -189,9 +189,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: li a2, 32 ; RV64ZBS-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: vmseq.vi v10, v8, 0 +; RV64ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV64ZBS-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV64ZBS-NEXT: vmv.x.s a0, v10 +; RV64ZBS-NEXT: vmv.x.s a0, v0 ; RV64ZBS-NEXT: bext a0, a0, a1 ; RV64ZBS-NEXT: ret %a = load <32 x i8>, ptr %x @@ -206,10 +206,10 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: li a2, 64 ; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: vmseq.vi v12, v8, 0 +; RV32-NEXT: vmseq.vi v0, v8, 0 ; RV32-NEXT: srli a0, a1, 5 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v8, v12, a0 +; RV32-NEXT: vslidedown.vx v8, v0, a0 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: srl a0, a0, a1 ; RV32-NEXT: andi a0, a0, 1 @@ -220,9 +220,9 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: li a2, 64 ; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: vmseq.vi v12, v8, 0 +; RV64-NEXT: vmseq.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vmv.x.s a0, v0 ; RV64-NEXT: srl a0, a0, a1 ; RV64-NEXT: andi a0, a0, 1 ; RV64-NEXT: ret @@ -232,10 +232,10 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: li a2, 64 ; RV32ZBS-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: vmseq.vi v12, v8, 0 +; RV32ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV32ZBS-NEXT: srli a0, a1, 5 ; RV32ZBS-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32ZBS-NEXT: vslidedown.vx v8, v12, a0 +; RV32ZBS-NEXT: vslidedown.vx v8, v0, a0 ; RV32ZBS-NEXT: vmv.x.s a0, v8 ; RV32ZBS-NEXT: bext a0, a0, a1 ; RV32ZBS-NEXT: ret @@ -245,9 +245,9 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: li a2, 64 ; RV64ZBS-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: vmseq.vi v12, v8, 0 +; RV64ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV64ZBS-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; RV64ZBS-NEXT: vmv.x.s a0, v12 +; RV64ZBS-NEXT: vmv.x.s a0, v0 ; RV64ZBS-NEXT: bext a0, a0, a1 ; RV64ZBS-NEXT: ret %a = load <64 x i8>, ptr %x @@ -262,10 +262,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: li a2, 128 ; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: vmseq.vi v16, v8, 0 +; RV32-NEXT: vmseq.vi v0, v8, 0 ; RV32-NEXT: srli a0, a1, 5 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vx v8, v16, a0 +; RV32-NEXT: vslidedown.vx v8, v0, a0 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: srl a0, a0, a1 ; RV32-NEXT: andi a0, a0, 1 @@ -276,10 +276,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: li a2, 128 ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: vmseq.vi v16, v8, 0 +; RV64-NEXT: vmseq.vi v0, v8, 0 ; RV64-NEXT: srli a0, a1, 6 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vslidedown.vx v8, v16, a0 +; RV64-NEXT: vslidedown.vx v8, v0, a0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: srl a0, a0, a1 ; RV64-NEXT: andi a0, a0, 1 @@ -290,10 +290,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: li a2, 128 ; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: vmseq.vi v16, v8, 0 +; RV32ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV32ZBS-NEXT: srli a0, a1, 5 ; RV32ZBS-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZBS-NEXT: vslidedown.vx v8, v16, a0 +; RV32ZBS-NEXT: vslidedown.vx v8, v0, a0 ; RV32ZBS-NEXT: vmv.x.s a0, v8 ; RV32ZBS-NEXT: bext a0, a0, a1 ; RV32ZBS-NEXT: ret @@ -303,10 +303,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: li a2, 128 ; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: vmseq.vi v16, v8, 0 +; RV64ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV64ZBS-NEXT: srli a0, a1, 6 ; RV64ZBS-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64ZBS-NEXT: vslidedown.vx v8, v16, a0 +; RV64ZBS-NEXT: vslidedown.vx v8, v0, a0 ; RV64ZBS-NEXT: vmv.x.s a0, v8 ; RV64ZBS-NEXT: bext a0, a0, a1 ; RV64ZBS-NEXT: ret @@ -527,8 +527,8 @@ define i1 @extractelt_v32i1_idx0(ptr %x) nounwind { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v10, v8, 0 -; CHECK-NEXT: vfirst.m a0, v10 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x @@ -543,8 +543,8 @@ define i1 @extractelt_v64i1_idx0(ptr %x) nounwind { ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v12, v8, 0 -; CHECK-NEXT: vfirst.m a0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x @@ -559,8 +559,8 @@ define i1 @extractelt_v128i1_idx0(ptr %x) nounwind { ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v16, v8, 0 -; CHECK-NEXT: vfirst.m a0, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x @@ -575,8 +575,8 @@ define i1 @extractelt_v256i1_idx0(ptr %x) nounwind { ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v16, v8, 0 -; CHECK-NEXT: vfirst.m a0, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <256 x i8>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index 6915722..25b9805 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -201,11 +201,9 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 2 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 2 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 2 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -436,11 +430,9 @@ define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,11 +472,9 @@ define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -566,11 +556,9 @@ define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -610,11 +598,9 @@ define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -654,11 +640,9 @@ define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -698,11 +682,9 @@ define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -750,11 +732,9 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -768,11 +748,9 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index edb3315..8d378fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -13,32 +13,33 @@ declare <2 x half> @llvm.vp.maximum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked: @@ -66,12 +66,11 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare <4 x half> @llvm.vp.maximum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32) define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked: @@ -138,12 +137,11 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,35 +155,34 @@ declare <8 x half> @llvm.vp.maximum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32) define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -198,11 +195,10 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked: @@ -214,11 +210,10 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -231,37 +226,34 @@ declare <16 x half> @llvm.vp.maximum.v16f16(<16 x half>, <16 x half>, <16 x i1>, define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmax.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -274,11 +266,10 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked: @@ -290,11 +281,10 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -307,15 +297,15 @@ declare <2 x float> @llvm.vp.maximum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i define <2 x float> @vfmax_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) ret <2 x float> %v @@ -326,11 +316,10 @@ define <2 x float> @vfmax_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -341,15 +330,15 @@ declare <4 x float> @llvm.vp.maximum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) ret <4 x float> %v @@ -360,11 +349,10 @@ define <4 x float> @vfmax_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -375,17 +363,15 @@ declare <8 x float> @llvm.vp.maximum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) ret <8 x float> %v @@ -396,11 +382,10 @@ define <8 x float> @vfmax_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v @@ -411,17 +396,15 @@ declare <16 x float> @llvm.vp.maximum.v16f32(<16 x float>, <16 x float>, <16 x i define <16 x float> @vfmax_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) ret <16 x float> %v @@ -432,11 +415,10 @@ define <16 x float> @vfmax_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v @@ -447,15 +429,15 @@ declare <2 x double> @llvm.vp.maximum.v2f64(<2 x double>, <2 x double>, <2 x i1> define <2 x double> @vfmax_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) ret <2 x double> %v @@ -466,11 +448,10 @@ define <2 x double> @vfmax_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -481,17 +462,15 @@ declare <4 x double> @llvm.vp.maximum.v4f64(<4 x double>, <4 x double>, <4 x i1> define <4 x double> @vfmax_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) ret <4 x double> %v @@ -502,11 +481,10 @@ define <4 x double> @vfmax_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v @@ -517,17 +495,15 @@ declare <8 x double> @llvm.vp.maximum.v8f64(<8 x double>, <8 x double>, <8 x i1> define <8 x double> @vfmax_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) ret <8 x double> %v @@ -538,11 +514,10 @@ define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v @@ -553,30 +528,15 @@ declare <16 x double> @llvm.vp.maximum.v16f64(<16 x double>, <16 x double>, <16 define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -587,9 +547,8 @@ define <16 x double> @vfmax_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -605,16 +564,15 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -622,16 +580,8 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB24_2 @@ -639,26 +589,27 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB24_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -668,36 +619,28 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t +; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -734,7 +677,6 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb @@ -742,7 +684,7 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -768,9 +710,8 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vfmax.vv v16, v16, v24 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 48649c4..3831261 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -13,32 +13,33 @@ declare <2 x half> @llvm.vp.minimum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) define <2 x half> @vfmin_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16_unmasked: @@ -66,12 +66,11 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare <4 x half> @llvm.vp.minimum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32) define <4 x half> @vfmin_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16_unmasked: @@ -138,12 +137,11 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,35 +155,34 @@ declare <8 x half> @llvm.vp.minimum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32) define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -198,11 +195,10 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16_unmasked: @@ -214,11 +210,10 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v14 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -231,37 +226,34 @@ declare <16 x half> @llvm.vp.minimum.v16f16(<16 x half>, <16 x half>, <16 x i1>, define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmin.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -274,11 +266,10 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16_unmasked: @@ -290,11 +281,10 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v20 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -307,15 +297,15 @@ declare <2 x float> @llvm.vp.minimum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i define <2 x float> @vfmin_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.minimum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) ret <2 x float> %v @@ -326,11 +316,10 @@ define <2 x float> @vfmin_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.minimum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -341,15 +330,15 @@ declare <4 x float> @llvm.vp.minimum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i define <4 x float> @vfmin_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.minimum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) ret <4 x float> %v @@ -360,11 +349,10 @@ define <4 x float> @vfmin_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.minimum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -375,17 +363,15 @@ declare <8 x float> @llvm.vp.minimum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i define <8 x float> @vfmin_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.minimum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) ret <8 x float> %v @@ -396,11 +382,10 @@ define <8 x float> @vfmin_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.minimum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v @@ -411,17 +396,15 @@ declare <16 x float> @llvm.vp.minimum.v16f32(<16 x float>, <16 x float>, <16 x i define <16 x float> @vfmin_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.minimum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) ret <16 x float> %v @@ -432,11 +415,10 @@ define <16 x float> @vfmin_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.minimum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v @@ -447,15 +429,15 @@ declare <2 x double> @llvm.vp.minimum.v2f64(<2 x double>, <2 x double>, <2 x i1> define <2 x double> @vfmin_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.minimum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) ret <2 x double> %v @@ -466,11 +448,10 @@ define <2 x double> @vfmin_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.minimum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -481,17 +462,15 @@ declare <4 x double> @llvm.vp.minimum.v4f64(<4 x double>, <4 x double>, <4 x i1> define <4 x double> @vfmin_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.minimum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) ret <4 x double> %v @@ -502,11 +481,10 @@ define <4 x double> @vfmin_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.minimum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v @@ -517,17 +495,15 @@ declare <8 x double> @llvm.vp.minimum.v8f64(<8 x double>, <8 x double>, <8 x i1> define <8 x double> @vfmin_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.minimum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) ret <8 x double> %v @@ -538,11 +514,10 @@ define <8 x double> @vfmin_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.minimum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v @@ -553,30 +528,15 @@ declare <16 x double> @llvm.vp.minimum.v16f64(<16 x double>, <16 x double>, <16 define <16 x double> @vfmin_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.minimum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -587,9 +547,8 @@ define <16 x double> @vfmin_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -605,16 +564,15 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -622,16 +580,8 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB24_2 @@ -639,26 +589,27 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB24_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -668,36 +619,28 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t +; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -734,7 +677,6 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb @@ -742,7 +684,7 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -768,9 +710,8 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vfmin.vv v16, v16, v24 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index e201d5d..3b85328 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -262,9 +262,9 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-NEXT: vwmaccu.vx v8, a0, v16 ; V128-NEXT: lui a1, 699051 ; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: li a2, 32 ; V128-NEXT: vmv.s.x v0, a1 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v8, v24, v0 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: addi a1, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll index a566fab..9a4c8af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll @@ -22,8 +22,8 @@ define void @fcmp_oeq_vv_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsm.v v8, (a2) +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10 +; ZVFHMIN-NEXT: vsm.v v0, (a2) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y @@ -50,8 +50,8 @@ define void @fcmp_oeq_vv_v8f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsm.v v8, (a2) +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10 +; ZVFHMIN-NEXT: vsm.v v0, (a2) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y @@ -166,8 +166,8 @@ define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v10, (a1) -; ZVFH-NEXT: vmflt.vv v12, v8, v10 -; ZVFH-NEXT: vsm.v v12, (a2) +; ZVFH-NEXT: vmflt.vv v0, v8, v10 +; ZVFH-NEXT: vsm.v v0, (a2) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16: @@ -178,8 +178,8 @@ define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a2) +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a2) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y @@ -194,8 +194,8 @@ define void @fcmp_olt_vv_v16f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v10, (a1) -; ZVFH-NEXT: vmflt.vv v12, v8, v10 -; ZVFH-NEXT: vsm.v v12, (a2) +; ZVFH-NEXT: vmflt.vv v0, v8, v10 +; ZVFH-NEXT: vsm.v v0, (a2) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16_nonans: @@ -206,8 +206,8 @@ define void @fcmp_olt_vv_v16f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a2) +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a2) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y @@ -222,8 +222,8 @@ define void @fcmp_oge_vv_v8f32(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v10, (a1) -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y @@ -238,8 +238,8 @@ define void @fcmp_oge_vv_v8f32_nonans(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v10, (a1) -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y @@ -305,8 +305,8 @@ define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v12, (a1) -; ZVFH-NEXT: vmflt.vv v16, v12, v8 -; ZVFH-NEXT: vmnot.m v8, v16 +; ZVFH-NEXT: vmflt.vv v0, v12, v8 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a2) ; ZVFH-NEXT: ret ; @@ -319,8 +319,8 @@ define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v24, v16 -; ZVFHMIN-NEXT: vmnot.m v8, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v24, v16 +; ZVFHMIN-NEXT: vmnot.m v8, v0 ; ZVFHMIN-NEXT: vsm.v v8, (a2) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x @@ -337,8 +337,8 @@ define void @fcmp_ule_vv_v32f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v12, (a1) -; ZVFH-NEXT: vmfle.vv v16, v8, v12 -; ZVFH-NEXT: vsm.v v16, (a2) +; ZVFH-NEXT: vmfle.vv v0, v8, v12 +; ZVFH-NEXT: vsm.v v0, (a2) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vv_v32f16_nonans: @@ -350,8 +350,8 @@ define void @fcmp_ule_vv_v32f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v24, v16 -; ZVFHMIN-NEXT: vsm.v v8, (a2) +; ZVFHMIN-NEXT: vmfle.vv v0, v24, v16 +; ZVFHMIN-NEXT: vsm.v v0, (a2) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x %b = load <32 x half>, ptr %y @@ -366,8 +366,8 @@ define void @fcmp_uge_vv_v16f32(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v12, (a1) -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v8, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a2) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x @@ -383,8 +383,8 @@ define void @fcmp_uge_vv_v16f32_nonans(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v12, (a1) -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x %b = load <16 x float>, ptr %y @@ -399,8 +399,8 @@ define void @fcmp_ult_vv_v8f64(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v12, (a1) -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a2) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x @@ -416,8 +416,8 @@ define void @fcmp_ult_vv_v8f64_nonans(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v12, (a1) -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x %b = load <8 x double>, ptr %y @@ -433,8 +433,8 @@ define void @fcmp_ugt_vv_v64f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v16, (a1) -; ZVFH-NEXT: vmfle.vv v24, v8, v16 -; ZVFH-NEXT: vmnot.m v8, v24 +; ZVFH-NEXT: vmfle.vv v0, v8, v16 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a2) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x @@ -451,8 +451,8 @@ define void @fcmp_ugt_vv_v64f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v16, (a1) -; ZVFH-NEXT: vmflt.vv v24, v16, v8 -; ZVFH-NEXT: vsm.v v24, (a2) +; ZVFH-NEXT: vmflt.vv v0, v16, v8 +; ZVFH-NEXT: vsm.v v0, (a2) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x %b = load <64 x half>, ptr %y @@ -468,9 +468,9 @@ define void @fcmp_ueq_vv_v32f32(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v16, (a1) -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v8, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmnor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a2) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x @@ -487,8 +487,8 @@ define void @fcmp_ueq_vv_v32f32_nonans(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v16, (a1) -; CHECK-NEXT: vmfeq.vv v24, v8, v16 -; CHECK-NEXT: vsm.v v24, (a2) +; CHECK-NEXT: vmfeq.vv v0, v8, v16 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x %b = load <32 x float>, ptr %y @@ -503,9 +503,9 @@ define void @fcmp_one_vv_v8f64(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v8, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a2) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x @@ -521,8 +521,8 @@ define void @fcmp_one_vv_v8f64_nonans(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: vmfne.vv v24, v8, v16 -; CHECK-NEXT: vsm.v v24, (a2) +; CHECK-NEXT: vmfne.vv v0, v8, v16 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x %b = load <16 x double>, ptr %y @@ -657,8 +657,8 @@ define void @fcmp_oeq_vf_v8f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 @@ -690,8 +690,8 @@ define void @fcmp_oeq_vf_v8f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 @@ -806,8 +806,8 @@ define void @fcmp_olt_vf_v16f16(ptr %x, half %y, ptr %z) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vsm.v v10, (a1) +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vf_v16f16: @@ -823,8 +823,8 @@ define void @fcmp_olt_vf_v16f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = insertelement <16 x half> poison, half %y, i32 0 @@ -839,8 +839,8 @@ define void @fcmp_olt_vf_v16f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vsm.v v10, (a1) +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vf_v16f16_nonans: @@ -856,8 +856,8 @@ define void @fcmp_olt_vf_v16f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = insertelement <16 x half> poison, half %y, i32 0 @@ -872,8 +872,8 @@ define void @fcmp_oge_vf_v8f32(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = insertelement <8 x float> poison, float %y, i32 0 @@ -888,8 +888,8 @@ define void @fcmp_oge_vf_v8f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = insertelement <8 x float> poison, float %y, i32 0 @@ -955,8 +955,8 @@ define void @fcmp_ule_vf_v32f16(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 32 ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfgt.vf v12, v8, fa0 -; ZVFH-NEXT: vmnot.m v8, v12 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a1) ; ZVFH-NEXT: ret ; @@ -974,8 +974,8 @@ define void @fcmp_ule_vf_v32f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v24, v16 -; ZVFHMIN-NEXT: vmnot.m v8, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v24, v16 +; ZVFHMIN-NEXT: vmnot.m v8, v0 ; ZVFHMIN-NEXT: vsm.v v8, (a1) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x @@ -992,8 +992,8 @@ define void @fcmp_ule_vf_v32f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 32 ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfle.vf v12, v8, fa0 -; ZVFH-NEXT: vsm.v v12, (a1) +; ZVFH-NEXT: vmfle.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vf_v32f16_nonans: @@ -1010,8 +1010,8 @@ define void @fcmp_ule_vf_v32f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v24 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v24 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x %b = insertelement <32 x half> poison, half %y, i32 0 @@ -1026,8 +1026,8 @@ define void @fcmp_uge_vf_v16f32(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v8, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x @@ -1043,8 +1043,8 @@ define void @fcmp_uge_vf_v16f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x %b = insertelement <16 x float> poison, float %y, i32 0 @@ -1059,8 +1059,8 @@ define void @fcmp_ult_vf_v8f64(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v8, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x @@ -1076,8 +1076,8 @@ define void @fcmp_ult_vf_v8f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x %b = insertelement <8 x double> poison, double %y, i32 0 @@ -1093,8 +1093,8 @@ define void @fcmp_ugt_vf_v64f16(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 64 ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfle.vf v16, v8, fa0 -; ZVFH-NEXT: vmnot.m v8, v16 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a1) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x @@ -1111,8 +1111,8 @@ define void @fcmp_ugt_vf_v64f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 64 ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfgt.vf v16, v8, fa0 -; ZVFH-NEXT: vsm.v v16, (a1) +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x %b = insertelement <64 x half> poison, half %y, i32 0 @@ -1128,9 +1128,9 @@ define void @fcmp_ueq_vf_v32f32(ptr %x, float %y, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v8, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x @@ -1147,8 +1147,8 @@ define void @fcmp_ueq_vf_v32f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfeq.vf v16, v8, fa0 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x %b = insertelement <32 x float> poison, float %y, i32 0 @@ -1163,9 +1163,9 @@ define void @fcmp_one_vf_v8f64(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v8, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x @@ -1181,8 +1181,8 @@ define void @fcmp_one_vf_v8f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vf v16, v8, fa0 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x %b = insertelement <16 x double> poison, double %y, i32 0 @@ -1330,8 +1330,8 @@ define void @fcmp_oeq_fv_v8f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 @@ -1363,8 +1363,8 @@ define void @fcmp_oeq_fv_v8f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 @@ -1479,8 +1479,8 @@ define void @fcmp_olt_fv_v16f16(ptr %x, half %y, ptr %z) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vsm.v v10, (a1) +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_fv_v16f16: @@ -1496,8 +1496,8 @@ define void @fcmp_olt_fv_v16f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = insertelement <16 x half> poison, half %y, i32 0 @@ -1512,8 +1512,8 @@ define void @fcmp_olt_fv_v16f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vsm.v v10, (a1) +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_fv_v16f16_nonans: @@ -1529,8 +1529,8 @@ define void @fcmp_olt_fv_v16f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = insertelement <16 x half> poison, half %y, i32 0 @@ -1545,8 +1545,8 @@ define void @fcmp_oge_fv_v8f32(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = insertelement <8 x float> poison, float %y, i32 0 @@ -1561,8 +1561,8 @@ define void @fcmp_oge_fv_v8f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = insertelement <8 x float> poison, float %y, i32 0 @@ -1628,8 +1628,8 @@ define void @fcmp_ule_fv_v32f16(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 32 ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmflt.vf v12, v8, fa0 -; ZVFH-NEXT: vmnot.m v8, v12 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a1) ; ZVFH-NEXT: ret ; @@ -1647,8 +1647,8 @@ define void @fcmp_ule_fv_v32f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v24 -; ZVFHMIN-NEXT: vmnot.m v8, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v24 +; ZVFHMIN-NEXT: vmnot.m v8, v0 ; ZVFHMIN-NEXT: vsm.v v8, (a1) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x @@ -1665,8 +1665,8 @@ define void @fcmp_ule_fv_v32f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 32 ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfge.vf v12, v8, fa0 -; ZVFH-NEXT: vsm.v v12, (a1) +; ZVFH-NEXT: vmfge.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_fv_v32f16_nonans: @@ -1683,8 +1683,8 @@ define void @fcmp_ule_fv_v32f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v24, v16 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfle.vv v0, v24, v16 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x %b = insertelement <32 x half> poison, half %y, i32 0 @@ -1699,8 +1699,8 @@ define void @fcmp_uge_fv_v16f32(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v8, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x @@ -1716,8 +1716,8 @@ define void @fcmp_uge_fv_v16f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x %b = insertelement <16 x float> poison, float %y, i32 0 @@ -1732,8 +1732,8 @@ define void @fcmp_ult_fv_v8f64(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v8, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x @@ -1749,8 +1749,8 @@ define void @fcmp_ult_fv_v8f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x %b = insertelement <8 x double> poison, double %y, i32 0 @@ -1766,8 +1766,8 @@ define void @fcmp_ugt_fv_v64f16(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 64 ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfge.vf v16, v8, fa0 -; ZVFH-NEXT: vmnot.m v8, v16 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a1) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x @@ -1784,8 +1784,8 @@ define void @fcmp_ugt_fv_v64f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 64 ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmflt.vf v16, v8, fa0 -; ZVFH-NEXT: vsm.v v16, (a1) +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x %b = insertelement <64 x half> poison, half %y, i32 0 @@ -1801,9 +1801,9 @@ define void @fcmp_ueq_fv_v32f32(ptr %x, float %y, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v8, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x @@ -1820,8 +1820,8 @@ define void @fcmp_ueq_fv_v32f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfeq.vf v16, v8, fa0 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x %b = insertelement <32 x float> poison, float %y, i32 0 @@ -1836,9 +1836,9 @@ define void @fcmp_one_fv_v8f64(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v8, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x @@ -1854,8 +1854,8 @@ define void @fcmp_one_fv_v8f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vf v16, v8, fa0 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x %b = insertelement <16 x double> poison, double %y, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll index 602662b..24d7a87 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll @@ -76,9 +76,8 @@ define <4 x i1> @vfptosi_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % ; CHECK-LABEL: vfptosi_v4i1_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll index c5bfd41..da512e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll @@ -76,9 +76,8 @@ define <4 x i1> @vfptoui_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % ; CHECK-LABEL: vfptoui_v4i1_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.rtz.xu.f.v v10, v8, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index cc76fd5..7f03bab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -628,6 +628,7 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) { ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: vs8r.v v16, (a1) ; RV32-NEXT: addi sp, s0, -80 +; RV32-NEXT: .cfi_def_cfa sp, 80 ; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 80 @@ -661,6 +662,7 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) { ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: vs8r.v v16, (a1) ; RV64-NEXT: addi sp, s0, -80 +; RV64-NEXT: .cfi_def_cfa sp, 80 ; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 80 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 1a905e5..a67ba6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -427,9 +427,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-NEXT: vwmaccu.vx v8, a0, v16 ; V128-NEXT: lui a1, 699051 ; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: li a2, 32 ; V128-NEXT: vmv.s.x v0, a1 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v8, v24, v0 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: addi a1, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll index 0b08d94..1a7d440 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll @@ -52,8 +52,8 @@ define void @setgt_vv_v64i8(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v12, (a1) -; CHECK-NEXT: vmslt.vv v16, v12, v8 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = load <64 x i8>, ptr %y @@ -69,8 +69,8 @@ define void @setlt_vv_v128i8(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: vmslt.vv v24, v8, v16 -; CHECK-NEXT: vsm.v v24, (a2) +; CHECK-NEXT: vmslt.vv v0, v8, v16 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = load <128 x i8>, ptr %y @@ -118,8 +118,8 @@ define void @setugt_vv_v32i8(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v10, (a1) -; CHECK-NEXT: vmsltu.vv v12, v10, v8 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y @@ -135,8 +135,8 @@ define void @setult_vv_v64i8(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v12, (a1) -; CHECK-NEXT: vmsltu.vv v16, v8, v12 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = load <64 x i8>, ptr %y @@ -152,8 +152,8 @@ define void @setuge_vv_v128i8(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: vmsleu.vv v24, v16, v8 -; CHECK-NEXT: vsm.v v24, (a2) +; CHECK-NEXT: vmsleu.vv v0, v16, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = load <128 x i8>, ptr %y @@ -200,8 +200,8 @@ define void @setne_vx_v32i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsne.vx v10, v8, a1 -; CHECK-NEXT: vsm.v v10, (a2) +; CHECK-NEXT: vmsne.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = insertelement <32 x i8> poison, i8 %y, i32 0 @@ -217,8 +217,8 @@ define void @setgt_vx_v64i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgt.vx v12, v8, a1 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmsgt.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = insertelement <64 x i8> poison, i8 %y, i32 0 @@ -234,8 +234,8 @@ define void @setlt_vx_v128i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmslt.vx v16, v8, a1 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmslt.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = insertelement <128 x i8> poison, i8 %y, i32 0 @@ -284,8 +284,8 @@ define void @setugt_vx_v32i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgtu.vx v10, v8, a1 -; CHECK-NEXT: vsm.v v10, (a2) +; CHECK-NEXT: vmsgtu.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = insertelement <32 x i8> poison, i8 %y, i32 0 @@ -301,8 +301,8 @@ define void @setult_vx_v64i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsltu.vx v12, v8, a1 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmsltu.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = insertelement <64 x i8> poison, i8 %y, i32 0 @@ -319,8 +319,8 @@ define void @setuge_vx_v128i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: vmsleu.vv v24, v16, v8 -; CHECK-NEXT: vsm.v v24, (a2) +; CHECK-NEXT: vmsleu.vv v0, v16, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = insertelement <128 x i8> poison, i8 %y, i32 0 @@ -368,8 +368,8 @@ define void @setne_xv_v32i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsne.vx v10, v8, a1 -; CHECK-NEXT: vsm.v v10, (a2) +; CHECK-NEXT: vmsne.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = insertelement <32 x i8> poison, i8 %y, i32 0 @@ -385,8 +385,8 @@ define void @setgt_xv_v64i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmslt.vx v12, v8, a1 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmslt.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = insertelement <64 x i8> poison, i8 %y, i32 0 @@ -402,8 +402,8 @@ define void @setlt_xv_v128i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgt.vx v16, v8, a1 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmsgt.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = insertelement <128 x i8> poison, i8 %y, i32 0 @@ -452,8 +452,8 @@ define void @setugt_xv_v32i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsltu.vx v10, v8, a1 -; CHECK-NEXT: vsm.v v10, (a2) +; CHECK-NEXT: vmsltu.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = insertelement <32 x i8> poison, i8 %y, i32 0 @@ -469,8 +469,8 @@ define void @setult_xv_v64i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgtu.vx v12, v8, a1 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmsgtu.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = insertelement <64 x i8> poison, i8 %y, i32 0 @@ -486,8 +486,8 @@ define void @setuge_xv_v128i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsleu.vx v16, v8, a1 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmsleu.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = insertelement <128 x i8> poison, i8 %y, i32 0 @@ -534,8 +534,8 @@ define void @setne_vi_v32i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsne.vi v10, v8, 0 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %d = icmp ne <32 x i8> %a, splat (i8 0) @@ -549,8 +549,8 @@ define void @setgt_vi_v64i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgt.vi v12, v8, 0 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmsgt.vi v0, v8, 0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %d = icmp sgt <64 x i8> %a, splat (i8 0) @@ -564,8 +564,8 @@ define void @setgt_vi_v64i8_nonzero(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgt.vi v12, v8, 5 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmsgt.vi v0, v8, 5 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %d = icmp sgt <64 x i8> %a, splat (i8 5) @@ -579,8 +579,8 @@ define void @setlt_vi_v128i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsle.vi v16, v8, -1 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %d = icmp slt <128 x i8> %a, splat (i8 0) @@ -622,8 +622,8 @@ define void @setugt_vi_v32i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgtu.vi v10, v8, 5 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmsgtu.vi v0, v8, 5 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %d = icmp ugt <32 x i8> %a, splat (i8 5) @@ -637,8 +637,8 @@ define void @setult_vi_v64i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsleu.vi v12, v8, 4 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmsleu.vi v0, v8, 4 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %d = icmp ult <64 x i8> %a, splat (i8 5) @@ -652,8 +652,8 @@ define void @setuge_vi_v128i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgtu.vi v16, v8, 4 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmsgtu.vi v0, v8, 4 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %d = icmp uge <128 x i8> %a, splat (i8 5) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll index 9161ced..27adc7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll @@ -20,8 +20,8 @@ define void @vector_interleave_store_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b, ptr ; CHECK-NEXT: li a2, -1 ; CHECK-NEXT: vwmaccu.vx v12, a2, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v12, 0 -; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: vmsne.vi v0, v12, 0 +; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret %res = call <32 x i1> @llvm.experimental.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b) store <32 x i1> %res, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll index 7fc442c..63c6dae5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -167,8 +167,8 @@ define void @splat_v32i1(ptr %x, i1 %y) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v10, v8, 0 -; CHECK-NEXT: vsm.v v10, (a0) +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret %a = insertelement <32 x i1> poison, i1 %y, i32 0 %b = shufflevector <32 x i1> %a, <32 x i1> poison, <32 x i32> zeroinitializer @@ -201,8 +201,8 @@ define void @splat_v64i1(ptr %x, i1 %y) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v12, v8, 0 -; CHECK-NEXT: vsm.v v12, (a0) +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret %a = insertelement <64 x i1> poison, i1 %y, i32 0 %b = shufflevector <64 x i1> %a, <64 x i1> poison, <64 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll index c6665c4..bbff66e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll @@ -369,10 +369,10 @@ define void @masked_load_v32f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV32-NEXT: vle64.v v16, (a3) ; RV32-NEXT: fcvt.d.w fa5, zero ; RV32-NEXT: vmfeq.vf v0, v8, fa5 -; RV32-NEXT: vmfeq.vf v24, v16, fa5 +; RV32-NEXT: vmfeq.vf v16, v16, fa5 ; RV32-NEXT: vle64.v v8, (a0), v0.t ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: vle64.v v16, (a0), v0.t ; RV32-NEXT: vse64.v v8, (a2) ; RV32-NEXT: addi a0, a2, 128 @@ -387,10 +387,10 @@ define void @masked_load_v32f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: fmv.d.x fa5, zero ; RV64-NEXT: vmfeq.vf v0, v8, fa5 -; RV64-NEXT: vmfeq.vf v24, v16, fa5 +; RV64-NEXT: vmfeq.vf v16, v16, fa5 ; RV64-NEXT: vle64.v v8, (a0), v0.t ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: vle64.v v16, (a0), v0.t ; RV64-NEXT: vse64.v v8, (a2) ; RV64-NEXT: addi a0, a2, 128 @@ -433,10 +433,10 @@ define void @masked_load_v64f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vle32.v v16, (a3) ; CHECK-NEXT: fmv.w.x fa5, zero ; CHECK-NEXT: vmfeq.vf v0, v8, fa5 -; CHECK-NEXT: vmfeq.vf v24, v16, fa5 +; CHECK-NEXT: vmfeq.vf v16, v16, fa5 ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 @@ -460,10 +460,10 @@ define void @masked_load_v128f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vle16.v v16, (a3) ; CHECK-NEXT: fmv.h.x fa5, zero ; CHECK-NEXT: vmfeq.vf v0, v8, fa5 -; CHECK-NEXT: vmfeq.vf v24, v16, fa5 +; CHECK-NEXT: vmfeq.vf v16, v16, fa5 ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index b6568fb..7e825b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -425,10 +425,10 @@ define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: vmseq.vi v0, v8, 0 -; RV64-NEXT: vmseq.vi v24, v16, 0 +; RV64-NEXT: vmseq.vi v16, v16, 0 ; RV64-NEXT: vle64.v v8, (a0), v0.t ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: vle64.v v16, (a0), v0.t ; RV64-NEXT: vse64.v v8, (a2) ; RV64-NEXT: addi a0, a2, 128 @@ -487,10 +487,10 @@ define void @masked_load_v64i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle32.v v16, (a3) ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vmseq.vi v16, v16, 0 ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 @@ -531,10 +531,10 @@ define void @masked_load_v256i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle8.v v16, (a3) ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vmseq.vi v16, v16, 0 ; CHECK-NEXT: vle8.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vle8.v v16, (a0), v0.t ; CHECK-NEXT: vse8.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll index 38cd831..7b9a1d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll @@ -368,7 +368,8 @@ define void @masked_store_v32f64(<32 x double>* %val_ptr, <32 x double>* %a, <32 ; RV32-NEXT: vle64.v v16, (a3) ; RV32-NEXT: vle64.v v8, (a2) ; RV32-NEXT: fcvt.d.w fa5, zero -; RV32-NEXT: vmfeq.vf v7, v16, fa5 +; RV32-NEXT: vmfeq.vf v0, v16, fa5 +; RV32-NEXT: vmv1r.v v7, v0 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v24, (a0) @@ -386,7 +387,8 @@ define void @masked_store_v32f64(<32 x double>* %val_ptr, <32 x double>* %a, <32 ; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: fmv.d.x fa5, zero -; RV64-NEXT: vmfeq.vf v7, v16, fa5 +; RV64-NEXT: vmfeq.vf v0, v16, fa5 +; RV64-NEXT: vmv1r.v v7, v0 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) @@ -432,7 +434,8 @@ define void @masked_store_v64f32(<64 x float>* %val_ptr, <64 x float>* %a, <64 x ; CHECK-NEXT: vle32.v v16, (a3) ; CHECK-NEXT: vle32.v v8, (a2) ; CHECK-NEXT: fmv.w.x fa5, zero -; CHECK-NEXT: vmfeq.vf v7, v16, fa5 +; CHECK-NEXT: vmfeq.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) @@ -459,7 +462,8 @@ define void @masked_store_v128f16(<128 x half>* %val_ptr, <128 x half>* %a, <128 ; CHECK-NEXT: vle16.v v16, (a3) ; CHECK-NEXT: vle16.v v8, (a2) ; CHECK-NEXT: fmv.h.x fa5, zero -; CHECK-NEXT: vmfeq.vf v7, v16, fa5 +; CHECK-NEXT: vmfeq.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v24, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll index d3676d8..9db2a6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -401,13 +401,13 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 10 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: addi a3, a2, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a3) ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -416,26 +416,25 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v7, v24, v16 +; RV32-NEXT: vmseq.vv v0, v24, v16 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, a0, 128 ; RV32-NEXT: vle64.v v24, (a2) ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmseq.vv v0, v8, v16 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmseq.vv v0, v0, v16 ; RV32-NEXT: addi a0, a1, 128 ; RV32-NEXT: vse64.v v24, (a0), v0.t -; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vse64.v v8, (a1), v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -446,7 +445,8 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; RV64-NEXT: addi a3, a2, 128 ; RV64-NEXT: vle64.v v8, (a3) ; RV64-NEXT: vle64.v v16, (a2) -; RV64-NEXT: vmseq.vi v7, v8, 0 +; RV64-NEXT: vmseq.vi v0, v8, 0 +; RV64-NEXT: vmv1r.v v7, v0 ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) @@ -508,7 +508,8 @@ define void @masked_store_v64i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; CHECK-NEXT: addi a3, a2, 128 ; CHECK-NEXT: vle32.v v8, (a3) ; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: vmseq.vi v7, v8, 0 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) @@ -552,7 +553,8 @@ define void @masked_store_v128i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; CHECK-NEXT: addi a3, a2, 128 ; CHECK-NEXT: vle16.v v8, (a3) ; CHECK-NEXT: vle16.v v16, (a2) -; CHECK-NEXT: vmseq.vi v7, v8, 0 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v24, (a0) @@ -578,7 +580,8 @@ define void @masked_store_v256i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; CHECK-NEXT: addi a3, a2, 128 ; CHECK-NEXT: vle8.v v8, (a3) ; CHECK-NEXT: vle8.v v16, (a2) -; CHECK-NEXT: vmseq.vi v7, v8, 0 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v24, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index cc8d230..f84f79e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -140,11 +140,9 @@ define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 @@ -268,11 +266,9 @@ define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 @@ -312,11 +308,9 @@ define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 @@ -398,11 +392,9 @@ define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 @@ -442,11 +434,9 @@ define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 @@ -486,11 +476,9 @@ define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 @@ -530,11 +518,9 @@ define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 @@ -582,11 +568,9 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a1 @@ -600,11 +584,9 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 46c7f31..b957f67 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1672,8 +1672,8 @@ define float @vreduce_fminimum_v8f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB103_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -1708,8 +1708,8 @@ define float @vreduce_fminimum_v16f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vcpop.m a0, v12 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB105_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -1745,8 +1745,8 @@ define float @vreduce_fminimum_v32f32(ptr %x) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB107_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -1790,8 +1790,8 @@ define float @vreduce_fminimum_v64f32(ptr %x) { ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB109_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -1831,72 +1831,122 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle32.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB111_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -1907,9 +1957,9 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: .LBB111_3: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1985,8 +2035,8 @@ define double @vreduce_fminimum_v4f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB115_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI115_0) @@ -2021,8 +2071,8 @@ define double @vreduce_fminimum_v8f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vcpop.m a0, v12 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB117_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI117_0) @@ -2057,8 +2107,8 @@ define double @vreduce_fminimum_v16f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB119_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI119_0) @@ -2100,8 +2150,8 @@ define double @vreduce_fminimum_v32f64(ptr %x) { ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB121_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI121_0) @@ -2140,71 +2190,121 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB123_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI123_0) @@ -2215,9 +2315,9 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: .LBB123_3: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -2328,8 +2428,8 @@ define float @vreduce_fmaximum_v8f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB129_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -2364,8 +2464,8 @@ define float @vreduce_fmaximum_v16f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vcpop.m a0, v12 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB131_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -2401,8 +2501,8 @@ define float @vreduce_fmaximum_v32f32(ptr %x) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB133_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -2446,8 +2546,8 @@ define float @vreduce_fmaximum_v64f32(ptr %x) { ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB135_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -2487,72 +2587,122 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle32.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB137_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -2563,9 +2713,9 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: .LBB137_3: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -2641,8 +2791,8 @@ define double @vreduce_fmaximum_v4f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB141_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI141_0) @@ -2677,8 +2827,8 @@ define double @vreduce_fmaximum_v8f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vcpop.m a0, v12 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB143_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI143_0) @@ -2713,8 +2863,8 @@ define double @vreduce_fmaximum_v16f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB145_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI145_0) @@ -2756,8 +2906,8 @@ define double @vreduce_fmaximum_v32f64(ptr %x) { ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB147_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI147_0) @@ -2796,71 +2946,121 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB149_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI149_0) @@ -2871,9 +3071,9 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: .LBB149_3: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index 8f7a564..baff2e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -1586,8 +1586,8 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3 ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vid.v v10 -; RV32-NEXT: vmsltu.vx v9, v10, a1 -; RV32-NEXT: vmand.mm v0, v9, v0 +; RV32-NEXT: vmsltu.vx v2, v10, a1 +; RV32-NEXT: vmand.mm v0, v2, v0 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v9, 1 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 @@ -1615,8 +1615,8 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3 ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vid.v v10 -; RV64-NEXT: vmsltu.vx v9, v10, a1 -; RV64-NEXT: vmand.mm v0, v9, v0 +; RV64-NEXT: vmsltu.vx v2, v10, a1 +; RV64-NEXT: vmand.mm v0, v2, v0 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.i v9, 1 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 @@ -1650,8 +1650,8 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vid.v v12 -; RV32-NEXT: vmsltu.vx v9, v12, a1 -; RV32-NEXT: vmand.mm v0, v9, v0 +; RV32-NEXT: vmsltu.vx v4, v12, a1 +; RV32-NEXT: vmand.mm v0, v4, v0 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, 1 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 @@ -1681,8 +1681,8 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vid.v v12 -; RV64-NEXT: vmsltu.vx v9, v12, a1 -; RV64-NEXT: vmand.mm v0, v9, v0 +; RV64-NEXT: vmsltu.vx v4, v12, a1 +; RV64-NEXT: vmand.mm v0, v4, v0 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, 1 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 @@ -1719,8 +1719,8 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vid.v v16 -; RV32-NEXT: vmsltu.vx v10, v16, a1 -; RV32-NEXT: vmand.mm v0, v10, v0 +; RV32-NEXT: vmsltu.vx v16, v16, a1 +; RV32-NEXT: vmand.mm v0, v16, v0 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV32-NEXT: vmv.v.i v10, 1 ; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 @@ -1753,8 +1753,8 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, ; RV64-NEXT: li a0, 32 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vid.v v16 -; RV64-NEXT: vmsltu.vx v10, v16, a1 -; RV64-NEXT: vmand.mm v0, v10, v0 +; RV64-NEXT: vmsltu.vx v16, v16, a1 +; RV64-NEXT: vmand.mm v0, v16, v0 ; RV64-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV64-NEXT: vmv.v.i v10, 1 ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 @@ -1796,14 +1796,14 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV32-NEXT: vle8.v v12, (a2) ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vid.v v16 -; RV32-NEXT: vmsltu.vx v14, v16, a1 -; RV32-NEXT: vsext.vf4 v16, v12 -; RV32-NEXT: vmsltu.vx v12, v16, a1 +; RV32-NEXT: vmsltu.vx v16, v16, a1 +; RV32-NEXT: vsext.vf4 v24, v12 +; RV32-NEXT: vmsltu.vx v24, v24, a1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vi v14, v12, 4 +; RV32-NEXT: vslideup.vi v16, v24, 4 ; RV32-NEXT: li a0, 64 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-NEXT: vmand.mm v0, v14, v0 +; RV32-NEXT: vmand.mm v0, v16, v0 ; RV32-NEXT: vmv.v.i v12, 1 ; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV32-NEXT: vslidedown.vx v12, v8, a3 @@ -1840,14 +1840,14 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV64-NEXT: vle8.v v12, (a2) ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vid.v v16 -; RV64-NEXT: vmsltu.vx v14, v16, a1 -; RV64-NEXT: vsext.vf4 v16, v12 -; RV64-NEXT: vmsltu.vx v12, v16, a1 +; RV64-NEXT: vmsltu.vx v16, v16, a1 +; RV64-NEXT: vsext.vf4 v24, v12 +; RV64-NEXT: vmsltu.vx v24, v24, a1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vi v14, v12, 4 +; RV64-NEXT: vslideup.vi v16, v24, 4 ; RV64-NEXT: li a0, 64 ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-NEXT: vmand.mm v0, v14, v0 +; RV64-NEXT: vmand.mm v0, v16, v0 ; RV64-NEXT: vmv.v.i v12, 1 ; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV64-NEXT: vslidedown.vx v12, v8, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index 1e4f344..257cffa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -128,10 +128,8 @@ define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -244,10 +242,8 @@ define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -284,10 +280,8 @@ define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -362,10 +356,8 @@ define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -402,10 +394,8 @@ define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -442,10 +432,8 @@ define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -482,10 +470,8 @@ define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -530,10 +516,8 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -546,10 +530,8 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index 91feb05..66bc452 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -201,11 +201,9 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -436,11 +430,9 @@ define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,11 +472,9 @@ define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -566,11 +556,9 @@ define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -610,11 +598,9 @@ define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -654,11 +640,9 @@ define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -698,11 +682,9 @@ define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -750,11 +732,9 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -768,11 +748,9 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index 89ba2d7..59923dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -201,11 +201,9 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -436,11 +430,9 @@ define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,11 +472,9 @@ define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -566,11 +556,9 @@ define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -610,11 +598,9 @@ define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -654,11 +640,9 @@ define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -698,11 +682,9 @@ define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -750,11 +732,9 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -768,11 +748,9 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index 4faee56..0439d0b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -201,11 +201,9 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -436,11 +430,9 @@ define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,11 +472,9 @@ define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 z ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -566,11 +556,9 @@ define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -610,11 +598,9 @@ define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -654,11 +640,9 @@ define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -698,11 +682,9 @@ define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -750,11 +732,9 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -768,11 +748,9 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 72f86dd..ec33a70 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -26,8 +26,7 @@ define <7 x i1> @fcmp_oeq_vv_v7f16(<7 x half> %va, <7 x half> %vb, <7 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <7 x i1> @llvm.vp.fcmp.v7f16(<7 x half> %va, <7 x half> %vb, metadata !"oeq", <7 x i1> %m, i32 %evl) ret <7 x i1> %v @@ -48,8 +47,7 @@ define <8 x i1> @fcmp_oeq_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"oeq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -73,8 +71,7 @@ define <8 x i1> @fcmp_oeq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -100,8 +97,7 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -122,8 +118,7 @@ define <8 x i1> @fcmp_ogt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ogt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -147,8 +142,7 @@ define <8 x i1> @fcmp_ogt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -174,8 +168,7 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -196,8 +189,7 @@ define <8 x i1> @fcmp_oge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"oge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -221,8 +213,7 @@ define <8 x i1> @fcmp_oge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -248,8 +239,7 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -270,8 +260,7 @@ define <8 x i1> @fcmp_olt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"olt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -295,8 +284,7 @@ define <8 x i1> @fcmp_olt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -322,8 +310,7 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -344,8 +331,7 @@ define <8 x i1> @fcmp_ole_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ole", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -369,8 +355,7 @@ define <8 x i1> @fcmp_ole_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -396,8 +381,7 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -420,9 +404,9 @@ define <8 x i1> @fcmp_one_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v10, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v12, v10, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"one", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -448,9 +432,9 @@ define <8 x i1> @fcmp_one_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v10, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v10, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -478,9 +462,9 @@ define <8 x i1> @fcmp_one_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v10, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v12, v10, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -502,12 +486,12 @@ define <8 x i1> @fcmp_ord_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ord", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -534,12 +518,12 @@ define <8 x i1> @fcmp_ord_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v2, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -568,12 +552,12 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -596,9 +580,9 @@ define <8 x i1> @fcmp_ueq_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v12, v10, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ueq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -624,9 +608,9 @@ define <8 x i1> @fcmp_ueq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v10, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -654,9 +638,9 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v12, v10, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -678,8 +662,8 @@ define <8 x i1> @fcmp_ugt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -704,8 +688,8 @@ define <8 x i1> @fcmp_ugt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -732,8 +716,8 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -755,8 +739,8 @@ define <8 x i1> @fcmp_uge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -781,8 +765,8 @@ define <8 x i1> @fcmp_uge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -809,8 +793,8 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -832,8 +816,8 @@ define <8 x i1> @fcmp_ult_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -858,8 +842,8 @@ define <8 x i1> @fcmp_ult_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -886,8 +870,8 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -909,8 +893,8 @@ define <8 x i1> @fcmp_ule_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ule", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -935,8 +919,8 @@ define <8 x i1> @fcmp_ule_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -963,8 +947,8 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -985,8 +969,7 @@ define <8 x i1> @fcmp_une_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"une", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1010,8 +993,7 @@ define <8 x i1> @fcmp_une_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -1037,8 +1019,7 @@ define <8 x i1> @fcmp_une_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -1060,12 +1041,12 @@ define <8 x i1> @fcmp_uno_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfne.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"uno", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1092,12 +1073,12 @@ define <8 x i1> @fcmp_uno_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfne.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v2, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -1126,12 +1107,12 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfne.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -1142,125 +1123,47 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 declare <128 x i1> @llvm.vp.fcmp.v128f16(<128 x half>, <128 x half>, metadata, <128 x i1>, i32) define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 x i1> %m, i32 zeroext %evl) { -; ZVFH32-LABEL: fcmp_oeq_vv_v128f16: -; ZVFH32: # %bb.0: -; ZVFH32-NEXT: addi sp, sp, -16 -; ZVFH32-NEXT: .cfi_def_cfa_offset 16 -; ZVFH32-NEXT: csrr a1, vlenb -; ZVFH32-NEXT: slli a1, a1, 4 -; ZVFH32-NEXT: sub sp, sp, a1 -; ZVFH32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; ZVFH32-NEXT: addi a1, a0, 128 -; ZVFH32-NEXT: li a3, 64 -; ZVFH32-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; ZVFH32-NEXT: vle16.v v24, (a1) -; ZVFH32-NEXT: csrr a1, vlenb -; ZVFH32-NEXT: slli a1, a1, 3 -; ZVFH32-NEXT: add a1, sp, a1 -; ZVFH32-NEXT: addi a1, a1, 16 -; ZVFH32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFH32-NEXT: vle16.v v24, (a0) -; ZVFH32-NEXT: addi a0, sp, 16 -; ZVFH32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFH32-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; ZVFH32-NEXT: vmv1r.v v6, v0 -; ZVFH32-NEXT: vslidedown.vi v25, v0, 8 -; ZVFH32-NEXT: vmv.v.v v7, v25 -; ZVFH32-NEXT: mv a0, a2 -; ZVFH32-NEXT: bltu a2, a3, .LBB43_2 -; ZVFH32-NEXT: # %bb.1: -; ZVFH32-NEXT: li a0, 64 -; ZVFH32-NEXT: .LBB43_2: -; ZVFH32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH32-NEXT: addi a0, sp, 16 -; ZVFH32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH32-NEXT: vmv1r.v v0, v6 -; ZVFH32-NEXT: vmfeq.vv v6, v8, v24, v0.t -; ZVFH32-NEXT: addi a0, a2, -64 -; ZVFH32-NEXT: sltu a1, a2, a0 -; ZVFH32-NEXT: addi a1, a1, -1 -; ZVFH32-NEXT: and a0, a1, a0 -; ZVFH32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH32-NEXT: csrr a0, vlenb -; ZVFH32-NEXT: slli a0, a0, 3 -; ZVFH32-NEXT: add a0, sp, a0 -; ZVFH32-NEXT: addi a0, a0, 16 -; ZVFH32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH32-NEXT: vmv1r.v v25, v7 -; ZVFH32-NEXT: vmv1r.v v0, v7 -; ZVFH32-NEXT: vmfeq.vv v25, v16, v8, v0.t -; ZVFH32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; ZVFH32-NEXT: vslideup.vi v6, v25, 8 -; ZVFH32-NEXT: vmv.v.v v0, v6 -; ZVFH32-NEXT: csrr a0, vlenb -; ZVFH32-NEXT: slli a0, a0, 4 -; ZVFH32-NEXT: add sp, sp, a0 -; ZVFH32-NEXT: addi sp, sp, 16 -; ZVFH32-NEXT: ret -; -; ZVFH64-LABEL: fcmp_oeq_vv_v128f16: -; ZVFH64: # %bb.0: -; ZVFH64-NEXT: addi sp, sp, -16 -; ZVFH64-NEXT: .cfi_def_cfa_offset 16 -; ZVFH64-NEXT: csrr a1, vlenb -; ZVFH64-NEXT: li a3, 24 -; ZVFH64-NEXT: mul a1, a1, a3 -; ZVFH64-NEXT: sub sp, sp, a1 -; ZVFH64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; ZVFH64-NEXT: addi a1, a0, 128 -; ZVFH64-NEXT: li a3, 64 -; ZVFH64-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; ZVFH64-NEXT: vle16.v v24, (a1) -; ZVFH64-NEXT: csrr a1, vlenb -; ZVFH64-NEXT: slli a1, a1, 4 -; ZVFH64-NEXT: add a1, sp, a1 -; ZVFH64-NEXT: addi a1, a1, 16 -; ZVFH64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFH64-NEXT: vle16.v v24, (a0) -; ZVFH64-NEXT: csrr a0, vlenb -; ZVFH64-NEXT: slli a0, a0, 3 -; ZVFH64-NEXT: add a0, sp, a0 -; ZVFH64-NEXT: addi a0, a0, 16 -; ZVFH64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFH64-NEXT: addi a0, sp, 16 -; ZVFH64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFH64-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; ZVFH64-NEXT: mv a0, a2 -; ZVFH64-NEXT: vslidedown.vi v17, v0, 8 -; ZVFH64-NEXT: bltu a2, a3, .LBB43_2 -; ZVFH64-NEXT: # %bb.1: -; ZVFH64-NEXT: li a0, 64 -; ZVFH64-NEXT: .LBB43_2: -; ZVFH64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH64-NEXT: csrr a0, vlenb -; ZVFH64-NEXT: slli a0, a0, 3 -; ZVFH64-NEXT: add a0, sp, a0 -; ZVFH64-NEXT: addi a0, a0, 16 -; ZVFH64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH64-NEXT: vmfeq.vv v16, v8, v24, v0.t -; ZVFH64-NEXT: addi a0, a2, -64 -; ZVFH64-NEXT: sltu a1, a2, a0 -; ZVFH64-NEXT: addi a1, a1, -1 -; ZVFH64-NEXT: and a0, a1, a0 -; ZVFH64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH64-NEXT: csrr a0, vlenb -; ZVFH64-NEXT: slli a0, a0, 4 -; ZVFH64-NEXT: add a0, sp, a0 -; ZVFH64-NEXT: addi a0, a0, 16 -; ZVFH64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH64-NEXT: addi a0, sp, 16 -; ZVFH64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH64-NEXT: vmv1r.v v0, v17 -; ZVFH64-NEXT: vmfeq.vv v17, v24, v8, v0.t -; ZVFH64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; ZVFH64-NEXT: vslideup.vi v16, v17, 8 -; ZVFH64-NEXT: vmv.v.v v0, v16 -; ZVFH64-NEXT: csrr a0, vlenb -; ZVFH64-NEXT: li a1, 24 -; ZVFH64-NEXT: mul a0, a0, a1 -; ZVFH64-NEXT: add sp, sp, a0 -; ZVFH64-NEXT: addi sp, sp, 16 -; ZVFH64-NEXT: ret +; ZVFH-LABEL: fcmp_oeq_vv_v128f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: sub sp, sp, a1 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFH-NEXT: addi a1, a0, 128 +; ZVFH-NEXT: li a3, 64 +; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma +; ZVFH-NEXT: vle16.v v24, (a1) +; ZVFH-NEXT: addi a1, sp, 16 +; ZVFH-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFH-NEXT: vle16.v v24, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; ZVFH-NEXT: vslidedown.vi v7, v0, 8 +; ZVFH-NEXT: mv a0, a2 +; ZVFH-NEXT: bltu a2, a3, .LBB43_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: li a0, 64 +; ZVFH-NEXT: .LBB43_2: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vmfeq.vv v24, v8, v24, v0.t +; ZVFH-NEXT: addi a0, a2, -64 +; ZVFH-NEXT: sltu a1, a2, a0 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: and a0, a1, a0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmfeq.vv v8, v16, v8, v0.t +; ZVFH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVFH-NEXT: vslideup.vi v24, v8, 8 +; ZVFH-NEXT: vmv.v.v v0, v24 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: ret ; ; ZVFHMIN32-LABEL: fcmp_oeq_vv_v128f16: ; ZVFHMIN32: # %bb.0: @@ -2367,8 +2270,7 @@ define <7 x i1> @fcmp_oeq_vv_v7f64(<7 x double> %va, <7 x double> %vb, <7 x i1> ; CHECK-LABEL: fcmp_oeq_vv_v7f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <7 x i1> @llvm.vp.fcmp.v7f64(<7 x double> %va, <7 x double> %vb, metadata !"oeq", <7 x i1> %m, i32 %evl) ret <7 x i1> %v @@ -2380,8 +2282,7 @@ define <8 x i1> @fcmp_oeq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_oeq_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oeq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2391,8 +2292,7 @@ define <8 x i1> @fcmp_oeq_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_oeq_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2404,8 +2304,7 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_oeq_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2417,8 +2316,7 @@ define <8 x i1> @fcmp_ogt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ogt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ogt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2428,8 +2326,7 @@ define <8 x i1> @fcmp_ogt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ogt_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2441,8 +2338,7 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ogt_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2454,8 +2350,7 @@ define <8 x i1> @fcmp_oge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_oge_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2465,8 +2360,7 @@ define <8 x i1> @fcmp_oge_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_oge_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2478,8 +2372,7 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_oge_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2491,8 +2384,7 @@ define <8 x i1> @fcmp_olt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_olt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"olt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2502,8 +2394,7 @@ define <8 x i1> @fcmp_olt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_olt_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2515,8 +2406,7 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_olt_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2528,8 +2418,7 @@ define <8 x i1> @fcmp_ole_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ole_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ole", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2539,8 +2428,7 @@ define <8 x i1> @fcmp_ole_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ole_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2552,8 +2440,7 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ole_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2565,9 +2452,9 @@ define <8 x i1> @fcmp_one_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_one_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2577,9 +2464,9 @@ define <8 x i1> @fcmp_one_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_one_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2591,9 +2478,9 @@ define <8 x i1> @fcmp_one_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_one_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2605,9 +2492,9 @@ define <8 x i1> @fcmp_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ord_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12, v0.t -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v4, v12, v12, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2619,9 +2506,9 @@ define <8 x i1> @fcmp_ord_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v4, v12, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2635,9 +2522,9 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v4, v12, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2649,9 +2536,9 @@ define <8 x i1> @fcmp_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ueq_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2661,9 +2548,9 @@ define <8 x i1> @fcmp_ueq_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ueq_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2675,9 +2562,9 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ueq_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2689,8 +2576,8 @@ define <8 x i1> @fcmp_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ugt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2700,8 +2587,8 @@ define <8 x i1> @fcmp_ugt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ugt_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2713,8 +2600,8 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ugt_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2726,8 +2613,8 @@ define <8 x i1> @fcmp_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_uge_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2737,8 +2624,8 @@ define <8 x i1> @fcmp_uge_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_uge_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2750,8 +2637,8 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_uge_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2763,8 +2650,8 @@ define <8 x i1> @fcmp_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ult_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2774,8 +2661,8 @@ define <8 x i1> @fcmp_ult_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ult_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2787,8 +2674,8 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ult_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2800,8 +2687,8 @@ define <8 x i1> @fcmp_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ule_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2811,8 +2698,8 @@ define <8 x i1> @fcmp_ule_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ule_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2824,8 +2711,8 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ule_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2837,8 +2724,7 @@ define <8 x i1> @fcmp_une_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_une_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"une", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2848,8 +2734,7 @@ define <8 x i1> @fcmp_une_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_une_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfne.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2861,8 +2746,7 @@ define <8 x i1> @fcmp_une_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_une_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfne.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2874,9 +2758,9 @@ define <8 x i1> @fcmp_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_uno_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12, v0.t -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v4, v12, v12, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2888,9 +2772,9 @@ define <8 x i1> @fcmp_uno_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v4, v12, fa0, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2904,9 +2788,9 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v4, v12, fa0, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2922,78 +2806,46 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 26 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a3, a1, 4 -; CHECK-NEXT: add a1, a3, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 2 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB87_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB87_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmfeq.vv v24, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v17, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v8, v16, v8, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v16, v17, 2 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vslideup.vi v24, v8, 2 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 26 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i1> @llvm.vp.fcmp.v32f64(<32 x double> %va, <32 x double> %vb, metadata !"oeq", <32 x i1> %m, i32 %evl) ret <32 x i1> %v } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ZVFH32: {{.*}} +; ZVFH64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index 97b5181..65d9dd0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -595,47 +595,36 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle8.v v8, (a2) +; CHECK-NEXT: vle8.v v24, (a2) ; CHECK-NEXT: addi a2, a3, -128 ; CHECK-NEXT: sltu a4, a3, a2 ; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: and a2, a4, a2 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vv v7, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v16, v16, v24, v0.t ; CHECK-NEXT: bltu a3, a1, .LBB51_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: .LBB51_2: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vmseq.vv v0, v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -655,16 +644,15 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB52_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB52_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v24 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -684,16 +672,15 @@ define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB53_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB53_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v24 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -707,8 +694,7 @@ define <8 x i1> @icmp_eq_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_eq_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -718,8 +704,7 @@ define <8 x i1> @icmp_eq_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroex ; CHECK-LABEL: icmp_eq_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -731,8 +716,7 @@ define <8 x i1> @icmp_eq_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 z ; CHECK-LABEL: icmp_eq_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -744,8 +728,7 @@ define <8 x i1> @icmp_eq_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_eq_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -755,8 +738,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext % ; CHECK-LABEL: icmp_eq_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -766,8 +748,7 @@ define <8 x i1> @icmp_ne_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_ne_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsne.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -777,8 +758,7 @@ define <8 x i1> @icmp_ne_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroex ; CHECK-LABEL: icmp_ne_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsne.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -790,8 +770,7 @@ define <8 x i1> @icmp_ne_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 z ; CHECK-LABEL: icmp_ne_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsne.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -803,8 +782,7 @@ define <8 x i1> @icmp_ne_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ne_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsne.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -814,8 +792,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext % ; CHECK-LABEL: icmp_ne_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsne.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -825,8 +802,7 @@ define <8 x i1> @icmp_ugt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_ugt_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsltu.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -836,8 +812,7 @@ define <8 x i1> @icmp_ugt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_ugt_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -849,8 +824,7 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_ugt_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsltu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -862,8 +836,7 @@ define <8 x i1> @icmp_ugt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ugt_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgtu.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -873,8 +846,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_ugt_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -884,8 +856,7 @@ define <8 x i1> @icmp_uge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_uge_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -895,10 +866,9 @@ define <8 x i1> @icmp_uge_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_uge_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vv v10, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -910,8 +880,7 @@ define <8 x i1> @icmp_uge_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_uge_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -923,8 +892,7 @@ define <8 x i1> @icmp_uge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_uge_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgtu.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -934,8 +902,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_uge_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -945,8 +912,7 @@ define <8 x i1> @icmp_ult_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_ult_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsltu.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -956,8 +922,7 @@ define <8 x i1> @icmp_ult_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_ult_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsltu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -969,8 +934,7 @@ define <8 x i1> @icmp_ult_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_ult_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -982,8 +946,7 @@ define <8 x i1> @icmp_ult_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ult_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -993,8 +956,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_ult_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgtu.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1004,8 +966,7 @@ define <8 x i1> @icmp_sgt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sgt_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmslt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1015,8 +976,7 @@ define <8 x i1> @icmp_sgt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_sgt_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1028,8 +988,7 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_sgt_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1041,8 +1000,7 @@ define <8 x i1> @icmp_sgt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sgt_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1052,8 +1010,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sgt_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1063,8 +1020,7 @@ define <8 x i1> @icmp_sge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sge_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1074,10 +1030,9 @@ define <8 x i1> @icmp_sge_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_sge_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vv v10, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1089,8 +1044,7 @@ define <8 x i1> @icmp_sge_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_sge_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1102,8 +1056,7 @@ define <8 x i1> @icmp_sge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sge_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1113,8 +1066,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sge_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1124,8 +1076,7 @@ define <8 x i1> @icmp_slt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_slt_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmslt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1135,8 +1086,7 @@ define <8 x i1> @icmp_slt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_slt_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1148,8 +1098,7 @@ define <8 x i1> @icmp_slt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_slt_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1161,8 +1110,7 @@ define <8 x i1> @icmp_slt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_slt_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1172,8 +1120,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_slt_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1183,8 +1130,7 @@ define <8 x i1> @icmp_sle_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sle_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1194,8 +1140,7 @@ define <8 x i1> @icmp_sle_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_sle_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1207,10 +1152,9 @@ define <8 x i1> @icmp_sle_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_sle_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vv v10, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1222,8 +1166,7 @@ define <8 x i1> @icmp_sle_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sle_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1233,8 +1176,7 @@ define <8 x i1> @icmp_sle_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sle_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1243,125 +1185,47 @@ define <8 x i1> @icmp_sle_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext declare <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32>, <64 x i32>, metadata, <64 x i1>, i32) define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: icmp_eq_vv_v64i32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v24, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v6, v0 -; RV32-NEXT: vslidedown.vi v25, v0, 4 -; RV32-NEXT: vmv1r.v v7, v25 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bltu a2, a3, .LBB99_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 32 -; RV32-NEXT: .LBB99_2: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v0, v6 -; RV32-NEXT: vmseq.vv v6, v8, v24, v0.t -; RV32-NEXT: addi a0, a2, -32 -; RV32-NEXT: sltu a1, a2, a0 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v25, v7 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vmseq.vv v25, v16, v8, v0.t -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vi v6, v25, 4 -; RV32-NEXT: vmv1r.v v0, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: icmp_eq_vv_v64i32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 24 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: li a3, 32 -; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV64-NEXT: vle32.v v24, (a1) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vle32.v v24, (a0) -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; RV64-NEXT: mv a0, a2 -; RV64-NEXT: vslidedown.vi v17, v0, 4 -; RV64-NEXT: bltu a2, a3, .LBB99_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 32 -; RV64-NEXT: .LBB99_2: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmseq.vv v16, v8, v24, v0.t -; RV64-NEXT: addi a0, a2, -32 -; RV64-NEXT: sltu a1, a2, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmv1r.v v0, v17 -; RV64-NEXT: vmseq.vv v17, v24, v8, v0.t -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vi v16, v17, 4 -; RV64-NEXT: vmv1r.v v0, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 24 -; RV64-NEXT: mul a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: icmp_eq_vv_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 4 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a3, .LBB99_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: .LBB99_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmseq.vv v24, v8, v24, v0.t +; CHECK-NEXT: addi a0, a2, -32 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmseq.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v24, v8, 4 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %v = call <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32> %va, <64 x i32> %vb, metadata !"eq", <64 x i1> %m, i32 %evl) ret <64 x i1> %v } @@ -1371,24 +1235,24 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vslidedown.vi v25, v0, 4 +; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB100_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB100_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v8, a0, v0.t +; CHECK-NEXT: vmseq.vx v8, v8, a0, v0.t ; CHECK-NEXT: addi a2, a1, -32 ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v24, v8, 4 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v0, v16, a0, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v0, 4 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0 %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer @@ -1401,24 +1265,24 @@ define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vslidedown.vi v25, v0, 4 +; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB101_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB101_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v8, a0, v0.t +; CHECK-NEXT: vmseq.vx v8, v8, a0, v0.t ; CHECK-NEXT: addi a2, a1, -32 ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v24, v8, 4 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v0, v16, a0, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v0, 4 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0 %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer @@ -1432,8 +1296,7 @@ define <8 x i1> @icmp_eq_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_eq_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmseq.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmseq.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1448,18 +1311,16 @@ define <8 x i1> @icmp_eq_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmseq.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmseq.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_eq_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmseq.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmseq.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1476,18 +1337,16 @@ define <8 x i1> @icmp_eq_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmseq.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmseq.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_eq_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmseq.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmseq.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1499,8 +1358,7 @@ define <8 x i1> @icmp_eq_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_eq_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1510,8 +1368,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext % ; CHECK-LABEL: icmp_eq_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1521,8 +1378,7 @@ define <8 x i1> @icmp_ne_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_ne_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsne.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsne.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1537,18 +1393,16 @@ define <8 x i1> @icmp_ne_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsne.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsne.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ne_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsne.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsne.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1565,18 +1419,16 @@ define <8 x i1> @icmp_ne_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsne.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsne.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ne_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsne.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsne.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1588,8 +1440,7 @@ define <8 x i1> @icmp_ne_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ne_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1599,8 +1450,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext % ; CHECK-LABEL: icmp_ne_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1610,8 +1460,7 @@ define <8 x i1> @icmp_ugt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_ugt_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsltu.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsltu.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1626,18 +1475,16 @@ define <8 x i1> @icmp_ugt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsltu.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsltu.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ugt_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsgtu.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1654,18 +1501,16 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsltu.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsltu.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ugt_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsltu.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsltu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1677,8 +1522,7 @@ define <8 x i1> @icmp_ugt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ugt_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1688,8 +1532,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_ugt_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1699,8 +1542,7 @@ define <8 x i1> @icmp_uge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_uge_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsleu.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsleu.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1715,20 +1557,18 @@ define <8 x i1> @icmp_uge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsleu.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsleu.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_uge_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsleu.vv v12, v16, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsleu.vv v0, v12, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1745,18 +1585,16 @@ define <8 x i1> @icmp_uge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsleu.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsleu.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_uge_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsleu.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsleu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1768,8 +1606,7 @@ define <8 x i1> @icmp_uge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_uge_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1779,8 +1616,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_uge_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1790,8 +1626,7 @@ define <8 x i1> @icmp_ult_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_ult_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsltu.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsltu.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1806,18 +1641,16 @@ define <8 x i1> @icmp_ult_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsltu.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsltu.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ult_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsltu.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsltu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1834,18 +1667,16 @@ define <8 x i1> @icmp_ult_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsltu.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsltu.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ult_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsgtu.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1857,8 +1688,7 @@ define <8 x i1> @icmp_ult_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ult_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1868,8 +1698,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_ult_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1879,8 +1708,7 @@ define <8 x i1> @icmp_sgt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sgt_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmslt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmslt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1895,18 +1723,16 @@ define <8 x i1> @icmp_sgt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmslt.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmslt.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sgt_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsgt.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsgt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1923,18 +1749,16 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmslt.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmslt.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sgt_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmslt.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmslt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1946,8 +1770,7 @@ define <8 x i1> @icmp_sgt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sgt_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1957,8 +1780,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sgt_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1968,8 +1790,7 @@ define <8 x i1> @icmp_sge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sge_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1984,20 +1805,18 @@ define <8 x i1> @icmp_sge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsle.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsle.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sge_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsle.vv v12, v16, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsle.vv v0, v12, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2014,18 +1833,16 @@ define <8 x i1> @icmp_sge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsle.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsle.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sge_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsle.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsle.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2037,8 +1854,7 @@ define <8 x i1> @icmp_sge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sge_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2048,8 +1864,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sge_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2059,8 +1874,7 @@ define <8 x i1> @icmp_slt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_slt_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmslt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmslt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2075,18 +1889,16 @@ define <8 x i1> @icmp_slt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmslt.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmslt.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_slt_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmslt.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmslt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2103,18 +1915,16 @@ define <8 x i1> @icmp_slt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmslt.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmslt.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_slt_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsgt.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsgt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2126,8 +1936,7 @@ define <8 x i1> @icmp_slt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_slt_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2137,8 +1946,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_slt_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2148,8 +1956,7 @@ define <8 x i1> @icmp_sle_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sle_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2164,18 +1971,16 @@ define <8 x i1> @icmp_sle_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsle.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsle.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sle_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsle.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsle.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2192,20 +1997,18 @@ define <8 x i1> @icmp_sle_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsle.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsle.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sle_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsle.vv v12, v16, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsle.vv v0, v12, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2217,8 +2020,7 @@ define <8 x i1> @icmp_sle_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sle_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2228,8 +2030,7 @@ define <8 x i1> @icmp_sle_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sle_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index 657d523..f0fcc48 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,RV64 -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+fast-unaligned-access -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64,RV64-MISALIGN +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+unaligned-vector-mem -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64,RV64-MISALIGN ; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh,+zve64f,+zvl128b,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,ZVE64F diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index fffc4d6..36c36a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=SLOW,RV32-SLOW ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=SLOW,RV64-SLOW -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+fast-unaligned-access -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+unaligned-vector-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=FAST,RV32-FAST -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+fast-unaligned-access -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+unaligned-vector-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=FAST,RV64-FAST define <4 x i32> @load_v4i32_align1(ptr %ptr) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll index 09b9e7c..a2fc114 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll @@ -86,10 +86,9 @@ define <8 x i1> @isnan_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: isnan_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfclass.v v10, v8, v0.t +; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 512 -; CHECK-NEXT: vmseq.vx v8, v10, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> %x, i32 2, <8 x i1> %m, i32 %evl) ret <8 x i1> %1 @@ -111,10 +110,9 @@ define <16 x i1> @isnan_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: isnan_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfclass.v v12, v8, v0.t +; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 256 -; CHECK-NEXT: vmseq.vx v8, v12, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> %x, i32 1, <16 x i1> %m, i32 %evl) ret <16 x i1> %1 @@ -162,10 +160,9 @@ define <4 x i1> @isposinf_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: isposinf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfclass.v v10, v8, v0.t +; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmseq.vx v8, v10, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> %x, i32 512, <4 x i1> %m, i32 %evl) ; 0x200 = "+inf" ret <4 x i1> %1 @@ -187,9 +184,8 @@ define <8 x i1> @isneginf_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: isneginf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfclass.v v12, v8, v0.t -; CHECK-NEXT: vmseq.vi v8, v12, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfclass.v v8, v8, v0.t +; CHECK-NEXT: vmseq.vi v0, v8, 1, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> %x, i32 4, <8 x i1> %m, i32 %evl) ; "-inf" ret <8 x i1> %1 @@ -212,9 +208,8 @@ define <16 x i1> @isfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 126 -; CHECK-NEXT: vand.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 504, <16 x i1> %m, i32 %evl) ; 0x1f8 = "finite" ret <16 x i1> %1 @@ -239,9 +234,8 @@ define <16 x i1> @isposfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 112 -; CHECK-NEXT: vand.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 448, <16 x i1> %m, i32 %evl) ; 0x1c0 = "+finite" ret <16 x i1> %1 @@ -265,9 +259,8 @@ define <16 x i1> @isnotfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 897 -; CHECK-NEXT: vand.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 519, <16 x i1> %m, i32 %evl) ; 0x207 = "inf|nan" ret <16 x i1> %1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll index 55e1a1d..ab83617 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll @@ -2575,12 +2575,10 @@ define <16 x i1> @fcmp_ogt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ogt_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2591,12 +2589,10 @@ define <16 x i1> @fcmp_ogt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2609,12 +2605,10 @@ define <16 x i1> @fcmp_ogt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2626,12 +2620,10 @@ define <16 x i1> @fcmp_oge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_oge_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2642,12 +2634,10 @@ define <16 x i1> @fcmp_oge_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2660,12 +2650,10 @@ define <16 x i1> @fcmp_oge_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2677,12 +2665,10 @@ define <16 x i1> @fcmp_olt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_olt_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2693,12 +2679,10 @@ define <16 x i1> @fcmp_olt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2711,12 +2695,10 @@ define <16 x i1> @fcmp_olt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2728,12 +2710,10 @@ define <16 x i1> @fcmp_ole_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ole_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2744,12 +2724,10 @@ define <16 x i1> @fcmp_ole_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2762,12 +2740,10 @@ define <16 x i1> @fcmp_ole_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2779,14 +2755,13 @@ define <16 x i1> @fcmp_one_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_one_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2797,14 +2772,13 @@ define <16 x i1> @fcmp_one_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2817,14 +2791,13 @@ define <16 x i1> @fcmp_one_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2836,9 +2809,9 @@ define <16 x i1> @fcmp_ord_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ord_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2849,9 +2822,9 @@ define <16 x i1> @fcmp_ord_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2864,9 +2837,9 @@ define <16 x i1> @fcmp_ord_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2878,14 +2851,13 @@ define <16 x i1> @fcmp_ueq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ueq_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2896,14 +2868,13 @@ define <16 x i1> @fcmp_ueq_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2916,14 +2887,13 @@ define <16 x i1> @fcmp_ueq_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2935,12 +2905,11 @@ define <16 x i1> @fcmp_ugt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ugt_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2951,12 +2920,11 @@ define <16 x i1> @fcmp_ugt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2969,12 +2937,11 @@ define <16 x i1> @fcmp_ugt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2986,12 +2953,11 @@ define <16 x i1> @fcmp_uge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_uge_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -3002,12 +2968,11 @@ define <16 x i1> @fcmp_uge_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3020,12 +2985,11 @@ define <16 x i1> @fcmp_uge_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3037,12 +3001,11 @@ define <16 x i1> @fcmp_ult_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ult_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -3053,12 +3016,11 @@ define <16 x i1> @fcmp_ult_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3071,12 +3033,11 @@ define <16 x i1> @fcmp_ult_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3088,12 +3049,11 @@ define <16 x i1> @fcmp_ule_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ule_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -3104,12 +3064,11 @@ define <16 x i1> @fcmp_ule_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3122,12 +3081,11 @@ define <16 x i1> @fcmp_ule_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3173,9 +3131,9 @@ define <16 x i1> @fcmp_uno_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_uno_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -3186,9 +3144,9 @@ define <16 x i1> @fcmp_uno_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3201,9 +3159,9 @@ define <16 x i1> @fcmp_uno_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3254,12 +3212,10 @@ define <32 x i1> @fcmp_ogt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3271,12 +3227,10 @@ define <32 x i1> @fcmp_ogt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3290,12 +3244,10 @@ define <32 x i1> @fcmp_ogt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3308,12 +3260,10 @@ define <32 x i1> @fcmp_oge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3325,12 +3275,10 @@ define <32 x i1> @fcmp_oge_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3344,12 +3292,10 @@ define <32 x i1> @fcmp_oge_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3362,12 +3308,10 @@ define <32 x i1> @fcmp_olt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3379,12 +3323,10 @@ define <32 x i1> @fcmp_olt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3398,12 +3340,10 @@ define <32 x i1> @fcmp_olt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3416,12 +3356,10 @@ define <32 x i1> @fcmp_ole_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3433,12 +3371,10 @@ define <32 x i1> @fcmp_ole_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3452,12 +3388,10 @@ define <32 x i1> @fcmp_ole_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3470,14 +3404,13 @@ define <32 x i1> @fcmp_one_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3489,14 +3422,13 @@ define <32 x i1> @fcmp_one_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3510,14 +3442,13 @@ define <32 x i1> @fcmp_one_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3530,9 +3461,9 @@ define <32 x i1> @fcmp_ord_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3544,9 +3475,9 @@ define <32 x i1> @fcmp_ord_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3560,9 +3491,9 @@ define <32 x i1> @fcmp_ord_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3575,14 +3506,13 @@ define <32 x i1> @fcmp_ueq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3594,14 +3524,13 @@ define <32 x i1> @fcmp_ueq_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3615,14 +3544,13 @@ define <32 x i1> @fcmp_ueq_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3635,12 +3563,11 @@ define <32 x i1> @fcmp_ugt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3652,12 +3579,11 @@ define <32 x i1> @fcmp_ugt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3671,12 +3597,11 @@ define <32 x i1> @fcmp_ugt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3689,12 +3614,11 @@ define <32 x i1> @fcmp_uge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3706,12 +3630,11 @@ define <32 x i1> @fcmp_uge_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3725,12 +3648,11 @@ define <32 x i1> @fcmp_uge_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3743,12 +3665,11 @@ define <32 x i1> @fcmp_ult_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3760,12 +3681,11 @@ define <32 x i1> @fcmp_ult_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3779,12 +3699,11 @@ define <32 x i1> @fcmp_ult_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3797,12 +3716,11 @@ define <32 x i1> @fcmp_ule_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3814,12 +3732,11 @@ define <32 x i1> @fcmp_ule_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3833,12 +3750,11 @@ define <32 x i1> @fcmp_ule_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3888,9 +3804,9 @@ define <32 x i1> @fcmp_uno_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3902,9 +3818,9 @@ define <32 x i1> @fcmp_uno_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3918,9 +3834,9 @@ define <32 x i1> @fcmp_uno_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -5866,12 +5782,10 @@ define <8 x i1> @fcmp_ogt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ogt_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5882,12 +5796,10 @@ define <8 x i1> @fcmp_ogt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5900,12 +5812,10 @@ define <8 x i1> @fcmp_ogt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5917,12 +5827,10 @@ define <8 x i1> @fcmp_oge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_oge_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5933,12 +5841,10 @@ define <8 x i1> @fcmp_oge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5951,12 +5857,10 @@ define <8 x i1> @fcmp_oge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5968,12 +5872,10 @@ define <8 x i1> @fcmp_olt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_olt_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5984,12 +5886,10 @@ define <8 x i1> @fcmp_olt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6002,12 +5902,10 @@ define <8 x i1> @fcmp_olt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6019,12 +5917,10 @@ define <8 x i1> @fcmp_ole_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ole_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6035,12 +5931,10 @@ define <8 x i1> @fcmp_ole_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6053,12 +5947,10 @@ define <8 x i1> @fcmp_ole_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6070,14 +5962,13 @@ define <8 x i1> @fcmp_one_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_one_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6088,14 +5979,13 @@ define <8 x i1> @fcmp_one_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6108,14 +5998,13 @@ define <8 x i1> @fcmp_one_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6127,9 +6016,9 @@ define <8 x i1> @fcmp_ord_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ord_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6140,9 +6029,9 @@ define <8 x i1> @fcmp_ord_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6155,9 +6044,9 @@ define <8 x i1> @fcmp_ord_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6169,14 +6058,13 @@ define <8 x i1> @fcmp_ueq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ueq_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6187,14 +6075,13 @@ define <8 x i1> @fcmp_ueq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6207,14 +6094,13 @@ define <8 x i1> @fcmp_ueq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6226,12 +6112,11 @@ define <8 x i1> @fcmp_ugt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ugt_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6242,12 +6127,11 @@ define <8 x i1> @fcmp_ugt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6260,12 +6144,11 @@ define <8 x i1> @fcmp_ugt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6277,12 +6160,11 @@ define <8 x i1> @fcmp_uge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_uge_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6293,12 +6175,11 @@ define <8 x i1> @fcmp_uge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6311,12 +6192,11 @@ define <8 x i1> @fcmp_uge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6328,12 +6208,11 @@ define <8 x i1> @fcmp_ult_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ult_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6344,12 +6223,11 @@ define <8 x i1> @fcmp_ult_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6362,12 +6240,11 @@ define <8 x i1> @fcmp_ult_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6379,12 +6256,11 @@ define <8 x i1> @fcmp_ule_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ule_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6395,12 +6271,11 @@ define <8 x i1> @fcmp_ule_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6413,12 +6288,11 @@ define <8 x i1> @fcmp_ule_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6464,9 +6338,9 @@ define <8 x i1> @fcmp_uno_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_uno_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6477,9 +6351,9 @@ define <8 x i1> @fcmp_uno_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6492,9 +6366,9 @@ define <8 x i1> @fcmp_uno_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6541,12 +6415,10 @@ define <16 x i1> @fcmp_ogt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ogt_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6557,12 +6429,10 @@ define <16 x i1> @fcmp_ogt_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6575,12 +6445,10 @@ define <16 x i1> @fcmp_ogt_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6592,12 +6460,10 @@ define <16 x i1> @fcmp_oge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_oge_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6608,12 +6474,10 @@ define <16 x i1> @fcmp_oge_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6626,12 +6490,10 @@ define <16 x i1> @fcmp_oge_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6643,12 +6505,10 @@ define <16 x i1> @fcmp_olt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_olt_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6659,12 +6519,10 @@ define <16 x i1> @fcmp_olt_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6677,12 +6535,10 @@ define <16 x i1> @fcmp_olt_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6694,12 +6550,10 @@ define <16 x i1> @fcmp_ole_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ole_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6710,12 +6564,10 @@ define <16 x i1> @fcmp_ole_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6728,12 +6580,10 @@ define <16 x i1> @fcmp_ole_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6745,14 +6595,13 @@ define <16 x i1> @fcmp_one_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_one_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6763,14 +6612,13 @@ define <16 x i1> @fcmp_one_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6783,14 +6631,13 @@ define <16 x i1> @fcmp_one_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6802,9 +6649,9 @@ define <16 x i1> @fcmp_ord_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ord_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6815,9 +6662,9 @@ define <16 x i1> @fcmp_ord_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6830,9 +6677,9 @@ define <16 x i1> @fcmp_ord_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6844,14 +6691,13 @@ define <16 x i1> @fcmp_ueq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ueq_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6862,14 +6708,13 @@ define <16 x i1> @fcmp_ueq_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6882,14 +6727,13 @@ define <16 x i1> @fcmp_ueq_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6901,12 +6745,11 @@ define <16 x i1> @fcmp_ugt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ugt_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6917,12 +6760,11 @@ define <16 x i1> @fcmp_ugt_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6935,12 +6777,11 @@ define <16 x i1> @fcmp_ugt_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6952,12 +6793,11 @@ define <16 x i1> @fcmp_uge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_uge_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6968,12 +6808,11 @@ define <16 x i1> @fcmp_uge_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6986,12 +6825,11 @@ define <16 x i1> @fcmp_uge_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7003,12 +6841,11 @@ define <16 x i1> @fcmp_ult_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ult_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -7019,12 +6856,11 @@ define <16 x i1> @fcmp_ult_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7037,12 +6873,11 @@ define <16 x i1> @fcmp_ult_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7054,12 +6889,11 @@ define <16 x i1> @fcmp_ule_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ule_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -7070,12 +6904,11 @@ define <16 x i1> @fcmp_ule_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7088,12 +6921,11 @@ define <16 x i1> @fcmp_ule_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7139,9 +6971,9 @@ define <16 x i1> @fcmp_uno_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_uno_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -7152,9 +6984,9 @@ define <16 x i1> @fcmp_uno_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7167,9 +6999,9 @@ define <16 x i1> @fcmp_uno_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -8482,12 +8314,10 @@ define <4 x i1> @fcmp_ogt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ogt_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8498,12 +8328,10 @@ define <4 x i1> @fcmp_ogt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8516,12 +8344,10 @@ define <4 x i1> @fcmp_ogt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8533,12 +8359,10 @@ define <4 x i1> @fcmp_oge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_oge_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8549,12 +8373,10 @@ define <4 x i1> @fcmp_oge_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8567,12 +8389,10 @@ define <4 x i1> @fcmp_oge_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8584,12 +8404,10 @@ define <4 x i1> @fcmp_olt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_olt_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8600,12 +8418,10 @@ define <4 x i1> @fcmp_olt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8618,12 +8434,10 @@ define <4 x i1> @fcmp_olt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8635,12 +8449,10 @@ define <4 x i1> @fcmp_ole_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ole_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8651,12 +8463,10 @@ define <4 x i1> @fcmp_ole_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8669,12 +8479,10 @@ define <4 x i1> @fcmp_ole_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8686,14 +8494,13 @@ define <4 x i1> @fcmp_one_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_one_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8704,14 +8511,13 @@ define <4 x i1> @fcmp_one_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8724,14 +8530,13 @@ define <4 x i1> @fcmp_one_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8743,9 +8548,9 @@ define <4 x i1> @fcmp_ord_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ord_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8756,9 +8561,9 @@ define <4 x i1> @fcmp_ord_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8771,9 +8576,9 @@ define <4 x i1> @fcmp_ord_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8785,14 +8590,13 @@ define <4 x i1> @fcmp_ueq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ueq_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8803,14 +8607,13 @@ define <4 x i1> @fcmp_ueq_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8823,14 +8626,13 @@ define <4 x i1> @fcmp_ueq_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8842,12 +8644,11 @@ define <4 x i1> @fcmp_ugt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ugt_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8858,12 +8659,11 @@ define <4 x i1> @fcmp_ugt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8876,12 +8676,11 @@ define <4 x i1> @fcmp_ugt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8893,12 +8692,11 @@ define <4 x i1> @fcmp_uge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_uge_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8909,12 +8707,11 @@ define <4 x i1> @fcmp_uge_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8927,12 +8724,11 @@ define <4 x i1> @fcmp_uge_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8944,12 +8740,11 @@ define <4 x i1> @fcmp_ult_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ult_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8960,12 +8755,11 @@ define <4 x i1> @fcmp_ult_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8978,12 +8772,11 @@ define <4 x i1> @fcmp_ult_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8995,12 +8788,11 @@ define <4 x i1> @fcmp_ule_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ule_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -9011,12 +8803,11 @@ define <4 x i1> @fcmp_ule_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -9029,12 +8820,11 @@ define <4 x i1> @fcmp_ule_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -9080,9 +8870,9 @@ define <4 x i1> @fcmp_uno_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_uno_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -9093,9 +8883,9 @@ define <4 x i1> @fcmp_uno_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -9108,9 +8898,9 @@ define <4 x i1> @fcmp_uno_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -9157,12 +8947,10 @@ define <8 x i1> @fcmp_ogt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ogt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9173,12 +8961,10 @@ define <8 x i1> @fcmp_ogt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9191,12 +8977,10 @@ define <8 x i1> @fcmp_ogt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9208,12 +8992,10 @@ define <8 x i1> @fcmp_oge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_oge_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9224,12 +9006,10 @@ define <8 x i1> @fcmp_oge_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9242,12 +9022,10 @@ define <8 x i1> @fcmp_oge_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9259,12 +9037,10 @@ define <8 x i1> @fcmp_olt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_olt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9275,12 +9051,10 @@ define <8 x i1> @fcmp_olt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9293,12 +9067,10 @@ define <8 x i1> @fcmp_olt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9310,12 +9082,10 @@ define <8 x i1> @fcmp_ole_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ole_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9326,12 +9096,10 @@ define <8 x i1> @fcmp_ole_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9344,12 +9112,10 @@ define <8 x i1> @fcmp_ole_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9361,14 +9127,13 @@ define <8 x i1> @fcmp_one_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_one_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9379,14 +9144,13 @@ define <8 x i1> @fcmp_one_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9399,14 +9163,13 @@ define <8 x i1> @fcmp_one_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9418,9 +9181,9 @@ define <8 x i1> @fcmp_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ord_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9431,9 +9194,9 @@ define <8 x i1> @fcmp_ord_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9446,9 +9209,9 @@ define <8 x i1> @fcmp_ord_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9460,14 +9223,13 @@ define <8 x i1> @fcmp_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ueq_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9478,14 +9240,13 @@ define <8 x i1> @fcmp_ueq_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9498,14 +9259,13 @@ define <8 x i1> @fcmp_ueq_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9517,12 +9277,11 @@ define <8 x i1> @fcmp_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ugt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9533,12 +9292,11 @@ define <8 x i1> @fcmp_ugt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9551,12 +9309,11 @@ define <8 x i1> @fcmp_ugt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9568,12 +9325,11 @@ define <8 x i1> @fcmp_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_uge_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9584,12 +9340,11 @@ define <8 x i1> @fcmp_uge_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9602,12 +9357,11 @@ define <8 x i1> @fcmp_uge_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9619,12 +9373,11 @@ define <8 x i1> @fcmp_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ult_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9635,12 +9388,11 @@ define <8 x i1> @fcmp_ult_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9653,12 +9405,11 @@ define <8 x i1> @fcmp_ult_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9670,12 +9421,11 @@ define <8 x i1> @fcmp_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ule_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9686,12 +9436,11 @@ define <8 x i1> @fcmp_ule_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9704,12 +9453,11 @@ define <8 x i1> @fcmp_ule_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9755,9 +9503,9 @@ define <8 x i1> @fcmp_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_uno_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9768,9 +9516,9 @@ define <8 x i1> @fcmp_uno_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9783,9 +9531,9 @@ define <8 x i1> @fcmp_uno_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll index 83037ba..e377e37 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll @@ -2137,9 +2137,9 @@ define <16 x i1> @fcmps_oeq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_oeq_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2149,9 +2149,9 @@ define <16 x i1> @fcmps_oeq_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_oeq_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2163,9 +2163,9 @@ define <16 x i1> @fcmps_oeq_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_oeq_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2313,9 +2313,9 @@ define <16 x i1> @fcmps_one_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_one_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2325,9 +2325,9 @@ define <16 x i1> @fcmps_one_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_one_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2339,9 +2339,9 @@ define <16 x i1> @fcmps_one_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_one_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2353,9 +2353,9 @@ define <16 x i1> @fcmps_ord_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_ord_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2366,9 +2366,9 @@ define <16 x i1> @fcmps_ord_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2381,9 +2381,9 @@ define <16 x i1> @fcmps_ord_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2395,9 +2395,9 @@ define <16 x i1> @fcmps_ueq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_ueq_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2407,9 +2407,9 @@ define <16 x i1> @fcmps_ueq_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ueq_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2421,9 +2421,9 @@ define <16 x i1> @fcmps_ueq_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ueq_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2435,8 +2435,8 @@ define <16 x i1> @fcmps_ugt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_ugt_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2446,8 +2446,8 @@ define <16 x i1> @fcmps_ugt_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ugt_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2459,8 +2459,8 @@ define <16 x i1> @fcmps_ugt_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ugt_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2472,8 +2472,8 @@ define <16 x i1> @fcmps_uge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_uge_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2483,8 +2483,8 @@ define <16 x i1> @fcmps_uge_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_uge_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2496,8 +2496,8 @@ define <16 x i1> @fcmps_uge_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_uge_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2509,8 +2509,8 @@ define <16 x i1> @fcmps_ult_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_ult_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2520,8 +2520,8 @@ define <16 x i1> @fcmps_ult_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ult_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2533,8 +2533,8 @@ define <16 x i1> @fcmps_ult_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ult_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2546,8 +2546,8 @@ define <16 x i1> @fcmps_ule_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_ule_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2557,8 +2557,8 @@ define <16 x i1> @fcmps_ule_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ule_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2570,8 +2570,8 @@ define <16 x i1> @fcmps_ule_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ule_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2583,9 +2583,9 @@ define <16 x i1> @fcmps_une_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_une_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2595,9 +2595,9 @@ define <16 x i1> @fcmps_une_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_une_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2609,9 +2609,9 @@ define <16 x i1> @fcmps_une_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_une_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2623,10 +2623,10 @@ define <16 x i1> @fcmps_uno_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_uno_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2637,10 +2637,10 @@ define <16 x i1> @fcmps_uno_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2653,10 +2653,10 @@ define <16 x i1> @fcmps_uno_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2670,9 +2670,9 @@ define <32 x i1> @fcmps_oeq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -2683,9 +2683,9 @@ define <32 x i1> @fcmps_oeq_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2698,9 +2698,9 @@ define <32 x i1> @fcmps_oeq_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2861,9 +2861,9 @@ define <32 x i1> @fcmps_one_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -2874,9 +2874,9 @@ define <32 x i1> @fcmps_one_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2889,9 +2889,9 @@ define <32 x i1> @fcmps_one_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2904,9 +2904,9 @@ define <32 x i1> @fcmps_ord_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -2918,9 +2918,9 @@ define <32 x i1> @fcmps_ord_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2934,9 +2934,9 @@ define <32 x i1> @fcmps_ord_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2949,9 +2949,9 @@ define <32 x i1> @fcmps_ueq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -2962,9 +2962,9 @@ define <32 x i1> @fcmps_ueq_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2977,9 +2977,9 @@ define <32 x i1> @fcmps_ueq_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2992,8 +2992,8 @@ define <32 x i1> @fcmps_ugt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3004,8 +3004,8 @@ define <32 x i1> @fcmps_ugt_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3018,8 +3018,8 @@ define <32 x i1> @fcmps_ugt_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3032,8 +3032,8 @@ define <32 x i1> @fcmps_uge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3044,8 +3044,8 @@ define <32 x i1> @fcmps_uge_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3058,8 +3058,8 @@ define <32 x i1> @fcmps_uge_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3072,8 +3072,8 @@ define <32 x i1> @fcmps_ult_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3084,8 +3084,8 @@ define <32 x i1> @fcmps_ult_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3098,8 +3098,8 @@ define <32 x i1> @fcmps_ult_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3112,8 +3112,8 @@ define <32 x i1> @fcmps_ule_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3124,8 +3124,8 @@ define <32 x i1> @fcmps_ule_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3138,8 +3138,8 @@ define <32 x i1> @fcmps_ule_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3152,9 +3152,9 @@ define <32 x i1> @fcmps_une_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3165,9 +3165,9 @@ define <32 x i1> @fcmps_une_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3180,9 +3180,9 @@ define <32 x i1> @fcmps_une_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3195,10 +3195,10 @@ define <32 x i1> @fcmps_uno_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3210,10 +3210,10 @@ define <32 x i1> @fcmps_uno_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3227,10 +3227,10 @@ define <32 x i1> @fcmps_uno_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmfle.vf v8, v12, fa0 -; CHECK-NEXT: vmnot.m v8, v8 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmfle.vf v4, v12, fa0 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -4839,9 +4839,9 @@ define <8 x i1> @fcmps_oeq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_oeq_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -4851,9 +4851,9 @@ define <8 x i1> @fcmps_oeq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_oeq_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -4865,9 +4865,9 @@ define <8 x i1> @fcmps_oeq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_oeq_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5015,9 +5015,9 @@ define <8 x i1> @fcmps_one_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_one_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5027,9 +5027,9 @@ define <8 x i1> @fcmps_one_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_one_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5041,9 +5041,9 @@ define <8 x i1> @fcmps_one_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_one_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5055,9 +5055,9 @@ define <8 x i1> @fcmps_ord_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_ord_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5068,9 +5068,9 @@ define <8 x i1> @fcmps_ord_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5083,9 +5083,9 @@ define <8 x i1> @fcmps_ord_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5097,9 +5097,9 @@ define <8 x i1> @fcmps_ueq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_ueq_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5109,9 +5109,9 @@ define <8 x i1> @fcmps_ueq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ueq_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5123,9 +5123,9 @@ define <8 x i1> @fcmps_ueq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ueq_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5137,8 +5137,8 @@ define <8 x i1> @fcmps_ugt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_ugt_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5148,8 +5148,8 @@ define <8 x i1> @fcmps_ugt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ugt_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5161,8 +5161,8 @@ define <8 x i1> @fcmps_ugt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ugt_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5174,8 +5174,8 @@ define <8 x i1> @fcmps_uge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_uge_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5185,8 +5185,8 @@ define <8 x i1> @fcmps_uge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_uge_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5198,8 +5198,8 @@ define <8 x i1> @fcmps_uge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_uge_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5211,8 +5211,8 @@ define <8 x i1> @fcmps_ult_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_ult_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5222,8 +5222,8 @@ define <8 x i1> @fcmps_ult_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ult_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5235,8 +5235,8 @@ define <8 x i1> @fcmps_ult_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ult_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5248,8 +5248,8 @@ define <8 x i1> @fcmps_ule_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_ule_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5259,8 +5259,8 @@ define <8 x i1> @fcmps_ule_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ule_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5272,8 +5272,8 @@ define <8 x i1> @fcmps_ule_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ule_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5285,9 +5285,9 @@ define <8 x i1> @fcmps_une_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_une_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5297,9 +5297,9 @@ define <8 x i1> @fcmps_une_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_une_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5311,9 +5311,9 @@ define <8 x i1> @fcmps_une_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_une_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5325,10 +5325,10 @@ define <8 x i1> @fcmps_uno_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_uno_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5339,10 +5339,10 @@ define <8 x i1> @fcmps_uno_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5355,10 +5355,10 @@ define <8 x i1> @fcmps_uno_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5371,9 +5371,9 @@ define <16 x i1> @fcmps_oeq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_oeq_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5383,9 +5383,9 @@ define <16 x i1> @fcmps_oeq_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_oeq_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5397,9 +5397,9 @@ define <16 x i1> @fcmps_oeq_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_oeq_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5547,9 +5547,9 @@ define <16 x i1> @fcmps_one_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_one_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5559,9 +5559,9 @@ define <16 x i1> @fcmps_one_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_one_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5573,9 +5573,9 @@ define <16 x i1> @fcmps_one_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_one_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5587,9 +5587,9 @@ define <16 x i1> @fcmps_ord_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_ord_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5600,9 +5600,9 @@ define <16 x i1> @fcmps_ord_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5615,9 +5615,9 @@ define <16 x i1> @fcmps_ord_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5629,9 +5629,9 @@ define <16 x i1> @fcmps_ueq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_ueq_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5641,9 +5641,9 @@ define <16 x i1> @fcmps_ueq_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ueq_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5655,9 +5655,9 @@ define <16 x i1> @fcmps_ueq_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ueq_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5669,8 +5669,8 @@ define <16 x i1> @fcmps_ugt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_ugt_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5680,8 +5680,8 @@ define <16 x i1> @fcmps_ugt_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ugt_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5693,8 +5693,8 @@ define <16 x i1> @fcmps_ugt_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ugt_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5706,8 +5706,8 @@ define <16 x i1> @fcmps_uge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_uge_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5717,8 +5717,8 @@ define <16 x i1> @fcmps_uge_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_uge_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5730,8 +5730,8 @@ define <16 x i1> @fcmps_uge_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_uge_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5743,8 +5743,8 @@ define <16 x i1> @fcmps_ult_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_ult_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5754,8 +5754,8 @@ define <16 x i1> @fcmps_ult_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ult_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5767,8 +5767,8 @@ define <16 x i1> @fcmps_ult_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ult_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5780,8 +5780,8 @@ define <16 x i1> @fcmps_ule_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_ule_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5791,8 +5791,8 @@ define <16 x i1> @fcmps_ule_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ule_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5804,8 +5804,8 @@ define <16 x i1> @fcmps_ule_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ule_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5817,9 +5817,9 @@ define <16 x i1> @fcmps_une_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_une_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5829,9 +5829,9 @@ define <16 x i1> @fcmps_une_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_une_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5843,9 +5843,9 @@ define <16 x i1> @fcmps_une_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_une_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5857,10 +5857,10 @@ define <16 x i1> @fcmps_uno_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_uno_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5871,10 +5871,10 @@ define <16 x i1> @fcmps_uno_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5887,10 +5887,10 @@ define <16 x i1> @fcmps_uno_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6967,9 +6967,9 @@ define <4 x i1> @fcmps_oeq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_oeq_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -6979,9 +6979,9 @@ define <4 x i1> @fcmps_oeq_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_oeq_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -6993,9 +6993,9 @@ define <4 x i1> @fcmps_oeq_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_oeq_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7143,9 +7143,9 @@ define <4 x i1> @fcmps_one_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_one_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7155,9 +7155,9 @@ define <4 x i1> @fcmps_one_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_one_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7169,9 +7169,9 @@ define <4 x i1> @fcmps_one_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_one_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7183,9 +7183,9 @@ define <4 x i1> @fcmps_ord_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ord_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7196,9 +7196,9 @@ define <4 x i1> @fcmps_ord_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7211,9 +7211,9 @@ define <4 x i1> @fcmps_ord_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7225,9 +7225,9 @@ define <4 x i1> @fcmps_ueq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ueq_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7237,9 +7237,9 @@ define <4 x i1> @fcmps_ueq_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ueq_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7251,9 +7251,9 @@ define <4 x i1> @fcmps_ueq_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ueq_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7265,8 +7265,8 @@ define <4 x i1> @fcmps_ugt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ugt_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7276,8 +7276,8 @@ define <4 x i1> @fcmps_ugt_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ugt_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7289,8 +7289,8 @@ define <4 x i1> @fcmps_ugt_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ugt_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7302,8 +7302,8 @@ define <4 x i1> @fcmps_uge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_uge_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7313,8 +7313,8 @@ define <4 x i1> @fcmps_uge_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_uge_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7326,8 +7326,8 @@ define <4 x i1> @fcmps_uge_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_uge_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7339,8 +7339,8 @@ define <4 x i1> @fcmps_ult_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ult_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7350,8 +7350,8 @@ define <4 x i1> @fcmps_ult_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ult_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7363,8 +7363,8 @@ define <4 x i1> @fcmps_ult_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ult_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7376,8 +7376,8 @@ define <4 x i1> @fcmps_ule_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ule_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7387,8 +7387,8 @@ define <4 x i1> @fcmps_ule_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ule_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7400,8 +7400,8 @@ define <4 x i1> @fcmps_ule_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ule_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7413,9 +7413,9 @@ define <4 x i1> @fcmps_une_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_une_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7425,9 +7425,9 @@ define <4 x i1> @fcmps_une_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_une_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7439,9 +7439,9 @@ define <4 x i1> @fcmps_une_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_une_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7453,10 +7453,10 @@ define <4 x i1> @fcmps_uno_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_uno_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7467,10 +7467,10 @@ define <4 x i1> @fcmps_uno_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7483,10 +7483,10 @@ define <4 x i1> @fcmps_uno_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7499,9 +7499,9 @@ define <8 x i1> @fcmps_oeq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_oeq_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7511,9 +7511,9 @@ define <8 x i1> @fcmps_oeq_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_oeq_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7525,9 +7525,9 @@ define <8 x i1> @fcmps_oeq_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_oeq_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7675,9 +7675,9 @@ define <8 x i1> @fcmps_one_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_one_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7687,9 +7687,9 @@ define <8 x i1> @fcmps_one_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_one_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7701,9 +7701,9 @@ define <8 x i1> @fcmps_one_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_one_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7715,9 +7715,9 @@ define <8 x i1> @fcmps_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ord_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7728,9 +7728,9 @@ define <8 x i1> @fcmps_ord_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7743,9 +7743,9 @@ define <8 x i1> @fcmps_ord_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7757,9 +7757,9 @@ define <8 x i1> @fcmps_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ueq_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7769,9 +7769,9 @@ define <8 x i1> @fcmps_ueq_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ueq_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7783,9 +7783,9 @@ define <8 x i1> @fcmps_ueq_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ueq_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7797,8 +7797,8 @@ define <8 x i1> @fcmps_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ugt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7808,8 +7808,8 @@ define <8 x i1> @fcmps_ugt_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ugt_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7821,8 +7821,8 @@ define <8 x i1> @fcmps_ugt_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ugt_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7834,8 +7834,8 @@ define <8 x i1> @fcmps_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_uge_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7845,8 +7845,8 @@ define <8 x i1> @fcmps_uge_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_uge_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7858,8 +7858,8 @@ define <8 x i1> @fcmps_uge_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_uge_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7871,8 +7871,8 @@ define <8 x i1> @fcmps_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ult_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7882,8 +7882,8 @@ define <8 x i1> @fcmps_ult_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ult_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7895,8 +7895,8 @@ define <8 x i1> @fcmps_ult_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ult_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7908,8 +7908,8 @@ define <8 x i1> @fcmps_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ule_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7919,8 +7919,8 @@ define <8 x i1> @fcmps_ule_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ule_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7932,8 +7932,8 @@ define <8 x i1> @fcmps_ule_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ule_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7945,9 +7945,9 @@ define <8 x i1> @fcmps_une_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_une_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7957,9 +7957,9 @@ define <8 x i1> @fcmps_une_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_une_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7971,9 +7971,9 @@ define <8 x i1> @fcmps_une_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_une_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7985,10 +7985,10 @@ define <8 x i1> @fcmps_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_uno_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7999,10 +7999,10 @@ define <8 x i1> @fcmps_uno_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -8015,10 +8015,10 @@ define <8 x i1> @fcmps_uno_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index e65decf..67570f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -291,16 +291,16 @@ define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> % ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a1, a1, a2 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: li a0, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index 466448a..e3ab3b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -26,8 +26,8 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vid.v v10 -; RV64-NEXT: vmsltu.vx v12, v10, a0 -; RV64-NEXT: vmand.mm v9, v9, v12 +; RV64-NEXT: vmsltu.vx v2, v10, a0 +; RV64-NEXT: vmand.mm v9, v9, v2 ; RV64-NEXT: vmandn.mm v8, v8, v9 ; RV64-NEXT: vmand.mm v9, v0, v9 ; RV64-NEXT: vmor.mm v0, v9, v8 @@ -48,8 +48,8 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze ; RV64ZVFHMIN: # %bb.0: ; RV64ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64ZVFHMIN-NEXT: vid.v v10 -; RV64ZVFHMIN-NEXT: vmsltu.vx v12, v10, a0 -; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v12 +; RV64ZVFHMIN-NEXT: vmsltu.vx v2, v10, a0 +; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v2 ; RV64ZVFHMIN-NEXT: vmandn.mm v8, v8, v9 ; RV64ZVFHMIN-NEXT: vmand.mm v9, v0, v9 ; RV64ZVFHMIN-NEXT: vmor.mm v0, v9, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index fb04d53..cb502de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -447,15 +447,29 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) { ; CHECK-LABEL: select_evl_v32i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v16, (a0) ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index 777e005..5979814 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -140,11 +140,9 @@ define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -184,11 +182,9 @@ define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -228,11 +224,9 @@ define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -356,11 +350,9 @@ define <vscale x 4 x float> @vp_floor_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -400,11 +392,9 @@ define <vscale x 8 x float> @vp_floor_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -444,11 +434,9 @@ define <vscale x 16 x float> @vp_floor_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -530,11 +518,9 @@ define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -574,11 +560,9 @@ define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -618,11 +602,9 @@ define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -662,11 +644,9 @@ define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -716,11 +696,9 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -734,11 +712,9 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll index 82d1123..24ce8a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -201,9 +201,8 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v3, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v3 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0 ; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll index b78b866..d92bf09 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll @@ -13,32 +13,33 @@ declare <vscale x 1 x half> @llvm.vp.maximum.nxv1f16(<vscale x 1 x half>, <vscal define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16_unmasked: @@ -66,12 +66,11 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare <vscale x 2 x half> @llvm.vp.maximum.nxv2f16(<vscale x 2 x half>, <vscal define <vscale x 2 x half> @vfmax_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16_unmasked: @@ -138,12 +137,11 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,35 +155,34 @@ declare <vscale x 4 x half> @llvm.vp.maximum.nxv4f16(<vscale x 4 x half>, <vscal define <vscale x 4 x half> @vfmax_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -198,11 +195,10 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16_unmasked: @@ -214,11 +210,10 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -231,37 +226,34 @@ declare <vscale x 8 x half> @llvm.vp.maximum.nxv8f16(<vscale x 8 x half>, <vscal define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmax.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -274,11 +266,10 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16_unmasked: @@ -290,11 +281,10 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -307,52 +297,36 @@ declare <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half>, <vs define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: vmv1r.v v20, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmfeq.vv v17, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v17 -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 -; ZVFH-NEXT: vmfeq.vv v17, v12, v12, v0.t -; ZVFH-NEXT: vmv1r.v v0, v17 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmv1r.v v0, v20 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 -; ZVFH-NEXT: vfmax.vv v8, v8, v20, v0.t +; ZVFH-NEXT: vmv1r.v v0, v20 +; ZVFH-NEXT: vfmax.vv v8, v8, v16, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl) ret <vscale x 16 x half> %v @@ -363,21 +337,14 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v16, v12, v12 -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v20 +; ZVFH-NEXT: vfmax.vv v8, v8, v16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -385,20 +352,12 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) ret <vscale x 16 x half> %v @@ -409,30 +368,15 @@ declare <vscale x 32 x half> @llvm.vp.maximum.nxv32f16(<vscale x 32 x half>, <vs define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 3 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: vmv1r.v v7, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v25, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v25 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vmfeq.vv v25, v16, v16, v0.t -; ZVFH-NEXT: vmv1r.v v0, v25 +; ZVFH-NEXT: vmfeq.vv v0, v16, v16, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vfmax.vv v8, v8, v16, v0.t -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: vfmax.vv v8, v8, v24, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv32f16: @@ -440,17 +384,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 34 +; ZVFHMIN-NEXT: li a2, 26 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv8r.v v16, v8 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -459,135 +396,99 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v5, v8 +; ZVFHMIN-NEXT: vmv1r.v v6, v0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v5 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB10_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: li a2, 24 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v24, v24, v16, v0 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 25 +; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v0, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 25 +; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 34 +; ZVFHMIN-NEXT: li a1, 26 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -601,9 +502,8 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v7, v16, v16 ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmfeq.vv v0, v16, v16 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vfmax.vv v8, v8, v24 ; ZVFH-NEXT: ret @@ -613,9 +513,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: li a2, 24 +; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -626,82 +527,80 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v6, v8 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB11_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v16 -; ZVFHMIN-NEXT: vmv8r.v v8, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -714,15 +613,15 @@ declare <vscale x 1 x float> @llvm.vp.maximum.nxv1f32(<vscale x 1 x float>, <vsc define <vscale x 1 x float> @vfmax_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 1 x float> @llvm.vp.maximum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x float> %v @@ -733,11 +632,10 @@ define <vscale x 1 x float> @vfmax_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 1 x float> @llvm.vp.maximum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x float> %v @@ -748,15 +646,15 @@ declare <vscale x 2 x float> @llvm.vp.maximum.nxv2f32(<vscale x 2 x float>, <vsc define <vscale x 2 x float> @vfmax_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x float> @llvm.vp.maximum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x float> %v @@ -767,11 +665,10 @@ define <vscale x 2 x float> @vfmax_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 2 x float> @llvm.vp.maximum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x float> %v @@ -782,17 +679,15 @@ declare <vscale x 4 x float> @llvm.vp.maximum.nxv4f32(<vscale x 4 x float>, <vsc define <vscale x 4 x float> @vfmax_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 4 x float> @llvm.vp.maximum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x float> %v @@ -803,11 +698,10 @@ define <vscale x 4 x float> @vfmax_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <vscale x 4 x float> @llvm.vp.maximum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x float> %v @@ -818,17 +712,15 @@ declare <vscale x 8 x float> @llvm.vp.maximum.nxv8f32(<vscale x 8 x float>, <vsc define <vscale x 8 x float> @vfmax_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x float> @llvm.vp.maximum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x float> %v @@ -839,11 +731,10 @@ define <vscale x 8 x float> @vfmax_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <vscale x 8 x float> @llvm.vp.maximum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) ret <vscale x 8 x float> %v @@ -854,15 +745,15 @@ declare <vscale x 1 x double> @llvm.vp.maximum.nxv1f64(<vscale x 1 x double>, <v define <vscale x 1 x double> @vfmax_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 1 x double> @llvm.vp.maximum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x double> %v @@ -873,11 +764,10 @@ define <vscale x 1 x double> @vfmax_vv_nxv1f64_unmasked(<vscale x 1 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 1 x double> @llvm.vp.maximum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x double> %v @@ -888,17 +778,15 @@ declare <vscale x 2 x double> @llvm.vp.maximum.nxv2f64(<vscale x 2 x double>, <v define <vscale x 2 x double> @vfmax_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x double> @llvm.vp.maximum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x double> %v @@ -909,11 +797,10 @@ define <vscale x 2 x double> @vfmax_vv_nxv2f64_unmasked(<vscale x 2 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <vscale x 2 x double> @llvm.vp.maximum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x double> %v @@ -924,17 +811,15 @@ declare <vscale x 4 x double> @llvm.vp.maximum.nxv4f64(<vscale x 4 x double>, <v define <vscale x 4 x double> @vfmax_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 4 x double> @llvm.vp.maximum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x double> %v @@ -945,11 +830,10 @@ define <vscale x 4 x double> @vfmax_vv_nxv4f64_unmasked(<vscale x 4 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <vscale x 4 x double> @llvm.vp.maximum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x double> %v @@ -960,30 +844,15 @@ declare <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double>, <v define <vscale x 8 x double> @vfmax_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x double> %v @@ -994,9 +863,8 @@ define <vscale x 8 x double> @vfmax_vv_nxv8f64_unmasked(<vscale x 8 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -1012,79 +880,43 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v24, v0, a3 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sub a3, a2, a1 ; CHECK-NEXT: sltu a4, a2, a3 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a4, a0, 5 -; CHECK-NEXT: add a0, a4, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v7, v24 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v17, v24, v24, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a0, a0, a3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmfeq.vv v0, v24, v24, v0.t +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vfmax.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a2, a1, .LBB28_2 ; CHECK-NEXT: # %bb.1: @@ -1092,49 +924,27 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: .LBB28_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 5 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1174,9 +984,8 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: vfmax.vv v8, v16, v8 ; CHECK-NEXT: addi a0, sp, 16 @@ -1197,9 +1006,8 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index 69ad7b4..198d3a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -201,9 +201,8 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v3, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v3 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0 ; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll index 69c7615..828cc57 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll @@ -13,32 +13,33 @@ declare <vscale x 1 x half> @llvm.vp.minimum.nxv1f16(<vscale x 1 x half>, <vscal define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv1f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define <vscale x 1 x half> @vfmin_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv1f16_unmasked: @@ -66,12 +66,11 @@ define <vscale x 1 x half> @vfmin_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare <vscale x 2 x half> @llvm.vp.minimum.nxv2f16(<vscale x 2 x half>, <vscal define <vscale x 2 x half> @vfmin_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define <vscale x 2 x half> @vfmin_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv2f16_unmasked: @@ -138,12 +137,11 @@ define <vscale x 2 x half> @vfmin_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,35 +155,34 @@ declare <vscale x 4 x half> @llvm.vp.minimum.nxv4f16(<vscale x 4 x half>, <vscal define <vscale x 4 x half> @vfmin_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -198,11 +195,10 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv4f16_unmasked: @@ -214,11 +210,10 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v14 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -231,37 +226,34 @@ declare <vscale x 8 x half> @llvm.vp.minimum.nxv8f16(<vscale x 8 x half>, <vscal define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmin.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -274,11 +266,10 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv8f16_unmasked: @@ -290,11 +281,10 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v20 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -307,52 +297,36 @@ declare <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half>, <vs define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: vmv1r.v v20, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmfeq.vv v17, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v17 -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 -; ZVFH-NEXT: vmfeq.vv v17, v12, v12, v0.t -; ZVFH-NEXT: vmv1r.v v0, v17 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmv1r.v v0, v20 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 -; ZVFH-NEXT: vfmin.vv v8, v8, v20, v0.t +; ZVFH-NEXT: vmv1r.v v0, v20 +; ZVFH-NEXT: vfmin.vv v8, v8, v16, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl) ret <vscale x 16 x half> %v @@ -363,21 +337,14 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v16, v12, v12 -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v20 +; ZVFH-NEXT: vfmin.vv v8, v8, v16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -385,20 +352,12 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) ret <vscale x 16 x half> %v @@ -409,30 +368,15 @@ declare <vscale x 32 x half> @llvm.vp.minimum.nxv32f16(<vscale x 32 x half>, <vs define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 3 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: vmv1r.v v7, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v25, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v25 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vmfeq.vv v25, v16, v16, v0.t -; ZVFH-NEXT: vmv1r.v v0, v25 +; ZVFH-NEXT: vmfeq.vv v0, v16, v16, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vfmin.vv v8, v8, v16, v0.t -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: vfmin.vv v8, v8, v24, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv32f16: @@ -440,17 +384,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 34 +; ZVFHMIN-NEXT: li a2, 26 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv8r.v v16, v8 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -459,135 +396,99 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v5, v8 +; ZVFHMIN-NEXT: vmv1r.v v6, v0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v5 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB10_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: li a2, 24 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v24, v24, v16, v0 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 25 +; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v0, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 25 +; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 34 +; ZVFHMIN-NEXT: li a1, 26 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -601,9 +502,8 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v7, v16, v16 ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmfeq.vv v0, v16, v16 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vfmin.vv v8, v8, v24 ; ZVFH-NEXT: ret @@ -613,9 +513,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: li a2, 24 +; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -626,82 +527,80 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v6, v8 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB11_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v16 -; ZVFHMIN-NEXT: vmv8r.v v8, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -714,15 +613,15 @@ declare <vscale x 1 x float> @llvm.vp.minimum.nxv1f32(<vscale x 1 x float>, <vsc define <vscale x 1 x float> @vfmin_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 1 x float> @llvm.vp.minimum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x float> %v @@ -733,11 +632,10 @@ define <vscale x 1 x float> @vfmin_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 1 x float> @llvm.vp.minimum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x float> %v @@ -748,15 +646,15 @@ declare <vscale x 2 x float> @llvm.vp.minimum.nxv2f32(<vscale x 2 x float>, <vsc define <vscale x 2 x float> @vfmin_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x float> @llvm.vp.minimum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x float> %v @@ -767,11 +665,10 @@ define <vscale x 2 x float> @vfmin_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 2 x float> @llvm.vp.minimum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x float> %v @@ -782,17 +679,15 @@ declare <vscale x 4 x float> @llvm.vp.minimum.nxv4f32(<vscale x 4 x float>, <vsc define <vscale x 4 x float> @vfmin_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 4 x float> @llvm.vp.minimum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x float> %v @@ -803,11 +698,10 @@ define <vscale x 4 x float> @vfmin_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <vscale x 4 x float> @llvm.vp.minimum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x float> %v @@ -818,17 +712,15 @@ declare <vscale x 8 x float> @llvm.vp.minimum.nxv8f32(<vscale x 8 x float>, <vsc define <vscale x 8 x float> @vfmin_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x float> @llvm.vp.minimum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x float> %v @@ -839,11 +731,10 @@ define <vscale x 8 x float> @vfmin_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <vscale x 8 x float> @llvm.vp.minimum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) ret <vscale x 8 x float> %v @@ -854,15 +745,15 @@ declare <vscale x 1 x double> @llvm.vp.minimum.nxv1f64(<vscale x 1 x double>, <v define <vscale x 1 x double> @vfmin_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 1 x double> @llvm.vp.minimum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x double> %v @@ -873,11 +764,10 @@ define <vscale x 1 x double> @vfmin_vv_nxv1f64_unmasked(<vscale x 1 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 1 x double> @llvm.vp.minimum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x double> %v @@ -888,17 +778,15 @@ declare <vscale x 2 x double> @llvm.vp.minimum.nxv2f64(<vscale x 2 x double>, <v define <vscale x 2 x double> @vfmin_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x double> @llvm.vp.minimum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x double> %v @@ -909,11 +797,10 @@ define <vscale x 2 x double> @vfmin_vv_nxv2f64_unmasked(<vscale x 2 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <vscale x 2 x double> @llvm.vp.minimum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x double> %v @@ -924,17 +811,15 @@ declare <vscale x 4 x double> @llvm.vp.minimum.nxv4f64(<vscale x 4 x double>, <v define <vscale x 4 x double> @vfmin_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 4 x double> @llvm.vp.minimum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x double> %v @@ -945,11 +830,10 @@ define <vscale x 4 x double> @vfmin_vv_nxv4f64_unmasked(<vscale x 4 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <vscale x 4 x double> @llvm.vp.minimum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x double> %v @@ -960,30 +844,15 @@ declare <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double>, <v define <vscale x 8 x double> @vfmin_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x double> %v @@ -994,9 +863,8 @@ define <vscale x 8 x double> @vfmin_vv_nxv8f64_unmasked(<vscale x 8 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -1012,79 +880,43 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v24, v0, a3 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sub a3, a2, a1 ; CHECK-NEXT: sltu a4, a2, a3 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a4, a0, 5 -; CHECK-NEXT: add a0, a4, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v7, v24 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v17, v24, v24, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a0, a0, a3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmfeq.vv v0, v24, v24, v0.t +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vfmin.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a2, a1, .LBB28_2 ; CHECK-NEXT: # %bb.1: @@ -1092,49 +924,27 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: .LBB28_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 5 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1174,9 +984,8 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: vfmin.vv v8, v16, v8 ; CHECK-NEXT: addi a0, sp, 16 @@ -1197,9 +1006,8 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll index 485f94ee2..53598c6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST ; ---------------------------------------------------------------------- diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll index 0e7e914..accc185 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+unaligned-scalar-mem,,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+unaligned-scalar-mem,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 0e09f59..1e38700 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1757,19 +1757,33 @@ define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vsc ; ; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV64-NEXT: vl2r.v v6, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v24, v7 +; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsext.vf8 v24, v6 ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t -; RV64-NEXT: vsext.vf8 v8, v7 -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i8> %idxs %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) @@ -1797,19 +1811,33 @@ define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vs ; ; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV64-NEXT: vl4re16.v v4, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v24, v6 +; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsext.vf4 v24, v4 ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t -; RV64-NEXT: vsext.vf4 v8, v6 -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index fadb4a1..05d6bc6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -201,11 +201,9 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscal ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a0 @@ -266,11 +264,9 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t ; ZVFH-NEXT: fsflags a0 @@ -287,11 +283,9 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a0 @@ -352,11 +346,9 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; ZVFH-NEXT: vmv1r.v v17, v0 -; ZVFH-NEXT: vmflt.vf v17, v12, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v12, fa5, v0.t ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v17 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: fsflags a0 @@ -373,11 +365,9 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v25, v0 -; ZVFHMIN-NEXT: vmflt.vf v25, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v25 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a0 @@ -438,11 +428,9 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; ZVFH-NEXT: vmv1r.v v25, v0 -; ZVFH-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v16, fa5, v0.t ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFH-NEXT: fsflags a0 @@ -469,11 +457,9 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: frflags a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: fsflags a2 @@ -490,11 +476,9 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v9, v7 -; ZVFHMIN-NEXT: vmflt.vf v9, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: fsflags a0 @@ -543,11 +527,9 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half> ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: frflags a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: fsflags a2 @@ -670,11 +652,9 @@ define <vscale x 4 x float> @vp_nearbyint_nxv4f32(<vscale x 4 x float> %va, <vsc ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 @@ -714,11 +694,9 @@ define <vscale x 8 x float> @vp_nearbyint_nxv8f32(<vscale x 8 x float> %va, <vsc ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 @@ -758,11 +736,9 @@ define <vscale x 16 x float> @vp_nearbyint_nxv16f32(<vscale x 16 x float> %va, < ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 @@ -844,11 +820,9 @@ define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 @@ -888,11 +862,9 @@ define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 @@ -932,11 +904,9 @@ define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 @@ -976,11 +946,9 @@ define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 @@ -1030,11 +998,9 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a2 @@ -1048,11 +1014,9 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/pr88799.ll b/llvm/test/CodeGen/RISCV/rvv/pr88799.ll new file mode 100644 index 0000000..7212a78 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/pr88799.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=riscv64-unknown-linux-gnu -mattr=+v | FileCheck %s + +define i32 @main() vscale_range(2,2) { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %vector.body +; CHECK-NEXT: lui a0, 1040368 +; CHECK-NEXT: addiw a0, a0, -144 +; CHECK-NEXT: vl2re16.v v8, (a0) +; CHECK-NEXT: vs2r.v v8, (zero) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +vector.body: + %0 = load <16 x i16>, ptr getelementptr ([3 x [23 x [23 x i16]]], ptr null, i64 -10593, i64 1, i64 22, i64 0), align 16 + store <16 x i16> %0, ptr null, align 2 + %wide.load = load <vscale x 8 x i16>, ptr getelementptr ([3 x [23 x [23 x i16]]], ptr null, i64 -10593, i64 1, i64 22, i64 0), align 16 + store <vscale x 8 x i16> %wide.load, ptr null, align 2 + ret i32 0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index 796e6dd..9ba3da9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -183,10 +183,8 @@ define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -242,10 +240,8 @@ define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -261,10 +257,8 @@ define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -320,10 +314,8 @@ define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; ZVFH-NEXT: vmv1r.v v17, v0 -; ZVFH-NEXT: vmflt.vf v17, v12, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v12, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v17 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -339,10 +331,8 @@ define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v25, v0 -; ZVFHMIN-NEXT: vmflt.vf v25, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v25 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -398,10 +388,8 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; ZVFH-NEXT: vmv1r.v v25, v0 -; ZVFH-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v16, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu @@ -427,10 +415,8 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -446,10 +432,8 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v9, v7 -; ZVFHMIN-NEXT: vmflt.vf v9, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -495,10 +479,8 @@ define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -610,10 +592,8 @@ define <vscale x 4 x float> @vp_rint_nxv4f32(<vscale x 4 x float> %va, <vscale x ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -650,10 +630,8 @@ define <vscale x 8 x float> @vp_rint_nxv8f32(<vscale x 8 x float> %va, <vscale x ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -690,10 +668,8 @@ define <vscale x 16 x float> @vp_rint_nxv16f32(<vscale x 16 x float> %va, <vscal ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -768,10 +744,8 @@ define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -808,10 +782,8 @@ define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -848,10 +820,8 @@ define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -888,10 +858,8 @@ define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -938,10 +906,8 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -954,10 +920,8 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index 43fd54c..b3fe6bb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -201,11 +201,9 @@ define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -352,11 +346,9 @@ define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; ZVFH-NEXT: vmv1r.v v17, v0 -; ZVFH-NEXT: vmflt.vf v17, v12, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v12, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vmv1r.v v0, v17 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -373,11 +365,9 @@ define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v25, v0 -; ZVFHMIN-NEXT: vmflt.vf v25, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v25 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -438,11 +428,9 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; ZVFH-NEXT: vmv1r.v v25, v0 -; ZVFH-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v16, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -469,11 +457,9 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -490,11 +476,9 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v9, v7 -; ZVFHMIN-NEXT: vmflt.vf v9, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -543,11 +527,9 @@ define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -670,11 +652,9 @@ define <vscale x 4 x float> @vp_round_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -714,11 +694,9 @@ define <vscale x 8 x float> @vp_round_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -758,11 +736,9 @@ define <vscale x 16 x float> @vp_round_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -844,11 +820,9 @@ define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -888,11 +862,9 @@ define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -932,11 +904,9 @@ define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -976,11 +946,9 @@ define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1030,11 +998,9 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1048,11 +1014,9 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll index 8214159..4dba53dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -201,11 +201,9 @@ define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscal ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscal ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscal ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -352,11 +346,9 @@ define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vs ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; ZVFH-NEXT: vmv1r.v v17, v0 -; ZVFH-NEXT: vmflt.vf v17, v12, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v12, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vmv1r.v v0, v17 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -373,11 +365,9 @@ define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vs ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v25, v0 -; ZVFHMIN-NEXT: vmflt.vf v25, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v25 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -438,11 +428,9 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; ZVFH-NEXT: vmv1r.v v25, v0 -; ZVFH-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v16, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -469,11 +457,9 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -490,11 +476,9 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v9, v7 -; ZVFHMIN-NEXT: vmflt.vf v9, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -543,11 +527,9 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half> ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -670,11 +652,9 @@ define <vscale x 4 x float> @vp_roundeven_nxv4f32(<vscale x 4 x float> %va, <vsc ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -714,11 +694,9 @@ define <vscale x 8 x float> @vp_roundeven_nxv8f32(<vscale x 8 x float> %va, <vsc ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -758,11 +736,9 @@ define <vscale x 16 x float> @vp_roundeven_nxv16f32(<vscale x 16 x float> %va, < ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -844,11 +820,9 @@ define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -888,11 +862,9 @@ define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -932,11 +904,9 @@ define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -976,11 +946,9 @@ define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1030,11 +998,9 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1048,11 +1014,9 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index e8ee307..109149f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -201,11 +201,9 @@ define <vscale x 4 x half> @vp_roundtozero_nxv4f16(<vscale x 4 x half> %va, <vsc ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vsc ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vsc ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -352,11 +346,9 @@ define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, < ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; ZVFH-NEXT: vmv1r.v v17, v0 -; ZVFH-NEXT: vmflt.vf v17, v12, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v12, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vmv1r.v v0, v17 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -373,11 +365,9 @@ define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, < ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v25, v0 -; ZVFHMIN-NEXT: vmflt.vf v25, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v25 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -438,11 +428,9 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, < ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; ZVFH-NEXT: vmv1r.v v25, v0 -; ZVFH-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v16, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -469,11 +457,9 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, < ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -490,11 +476,9 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, < ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v9, v7 -; ZVFHMIN-NEXT: vmflt.vf v9, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -543,11 +527,9 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x hal ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -670,11 +652,9 @@ define <vscale x 4 x float> @vp_roundtozero_nxv4f32(<vscale x 4 x float> %va, <v ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -714,11 +694,9 @@ define <vscale x 8 x float> @vp_roundtozero_nxv8f32(<vscale x 8 x float> %va, <v ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -758,11 +736,9 @@ define <vscale x 16 x float> @vp_roundtozero_nxv16f32(<vscale x 16 x float> %va, ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -844,11 +820,9 @@ define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -888,11 +862,9 @@ define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -932,11 +904,9 @@ define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -976,11 +946,9 @@ define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1030,11 +998,9 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1048,11 +1014,9 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll index 10b77e5..df6d752 100644 --- a/llvm/test/CodeGen/RISCV/rvv/select-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll @@ -133,9 +133,9 @@ define <vscale x 16 x i1> @select_nxv16i1(i1 zeroext %c, <vscale x 16 x i1> %a, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v9, v10, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v2, v10, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v2 +; CHECK-NEXT: vmand.mm v9, v0, v2 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %v = select i1 %c, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b @@ -149,9 +149,9 @@ define <vscale x 16 x i1> @selectcc_nxv16i1(i1 signext %a, i1 signext %b, <vscal ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v9, v10, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v2, v10, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v2 +; CHECK-NEXT: vmand.mm v9, v0, v2 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ne i1 %a, %b @@ -164,9 +164,9 @@ define <vscale x 32 x i1> @select_nxv32i1(i1 zeroext %c, <vscale x 32 x i1> %a, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v9, v12, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v4, v12, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v4 +; CHECK-NEXT: vmand.mm v9, v0, v4 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %v = select i1 %c, <vscale x 32 x i1> %a, <vscale x 32 x i1> %b @@ -180,9 +180,9 @@ define <vscale x 32 x i1> @selectcc_nxv32i1(i1 signext %a, i1 signext %b, <vscal ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v9, v12, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v4, v12, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v4 +; CHECK-NEXT: vmand.mm v9, v0, v4 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ne i1 %a, %b @@ -195,9 +195,9 @@ define <vscale x 64 x i1> @select_nxv64i1(i1 zeroext %c, <vscale x 64 x i1> %a, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v9, v16, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v16, v16, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v16 +; CHECK-NEXT: vmand.mm v9, v0, v16 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %v = select i1 %c, <vscale x 64 x i1> %a, <vscale x 64 x i1> %b @@ -211,9 +211,9 @@ define <vscale x 64 x i1> @selectcc_nxv64i1(i1 signext %a, i1 signext %b, <vscal ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v9, v16, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v16, v16, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v16 +; CHECK-NEXT: vmand.mm v9, v0, v16 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ne i1 %a, %b diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index cddd371..ee939d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1083,8 +1083,7 @@ define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3f16(<vscale x 3 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3f16(<vscale x 3 x half> %va, <vscale x 3 x half> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl) ret <vscale x 3 x i1> %v @@ -1096,8 +1095,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_oeq_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmfeq.vv v0, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oeq_vv_nxv8f16: @@ -1106,8 +1104,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1117,8 +1114,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_oeq_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oeq_vf_nxv8f16: @@ -1131,8 +1127,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1144,8 +1139,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_oeq_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oeq_vf_swap_nxv8f16: @@ -1158,8 +1152,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1171,8 +1164,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ogt_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v10, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v10, v8, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ogt_vv_nxv8f16: @@ -1181,8 +1173,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1192,8 +1183,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ogt_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ogt_vf_nxv8f16: @@ -1206,8 +1196,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1219,8 +1208,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ogt_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ogt_vf_swap_nxv8f16: @@ -1233,8 +1221,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1246,8 +1233,7 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_oge_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v10, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v10, v8, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oge_vv_nxv8f16: @@ -1256,8 +1242,7 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1267,8 +1252,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_oge_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oge_vf_nxv8f16: @@ -1281,8 +1265,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1294,8 +1277,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_oge_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oge_vf_swap_nxv8f16: @@ -1308,8 +1290,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1321,8 +1302,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_olt_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vv_nxv8f16: @@ -1331,8 +1311,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1342,8 +1321,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_olt_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vf_nxv8f16: @@ -1356,8 +1334,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1369,8 +1346,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_olt_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vf_swap_nxv8f16: @@ -1383,8 +1359,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1396,8 +1371,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ole_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ole_vv_nxv8f16: @@ -1406,8 +1380,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1417,8 +1390,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ole_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ole_vf_nxv8f16: @@ -1431,8 +1403,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1444,8 +1415,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ole_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ole_vf_swap_nxv8f16: @@ -1458,8 +1428,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1471,9 +1440,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_one_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmflt.vv v13, v10, v8, v0.t -; ZVFH-NEXT: vmor.mm v0, v13, v12 +; ZVFH-NEXT: vmflt.vv v2, v8, v10, v0.t +; ZVFH-NEXT: vmflt.vv v0, v10, v8, v0.t +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_vv_nxv8f16: @@ -1482,9 +1451,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1494,9 +1463,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_one_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; ZVFH-NEXT: vmor.mm v0, v11, v10 +; ZVFH-NEXT: vmflt.vf v2, v8, fa0, v0.t +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16: @@ -1509,9 +1478,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v16, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1523,9 +1492,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_one_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmflt.vf v11, v8, fa0, v0.t -; ZVFH-NEXT: vmor.mm v0, v11, v10 +; ZVFH-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_vf_swap_nxv8f16: @@ -1538,9 +1507,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1552,9 +1521,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v12, v10, v10, v0.t -; ZVFH-NEXT: vmfeq.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vv v2, v10, v10, v0.t +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmand.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16: @@ -1562,12 +1531,12 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v10, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1579,9 +1548,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vf v12, v10, fa0, v0.t -; ZVFH-NEXT: vmfeq.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vf v2, v10, fa0, v0.t +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmand.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16: @@ -1593,12 +1562,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v12, v12, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1612,9 +1581,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vf v12, v10, fa0, v0.t -; ZVFH-NEXT: vmfeq.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmand.mm v0, v12, v10 +; ZVFH-NEXT: vmfeq.vf v2, v10, fa0, v0.t +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmand.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vf_swap_nxv8f16: @@ -1626,12 +1595,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v12, v12, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1643,9 +1612,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ueq_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmflt.vv v13, v10, v8, v0.t -; ZVFH-NEXT: vmnor.mm v0, v13, v12 +; ZVFH-NEXT: vmflt.vv v2, v8, v10, v0.t +; ZVFH-NEXT: vmflt.vv v0, v10, v8, v0.t +; ZVFH-NEXT: vmnor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_vv_nxv8f16: @@ -1654,9 +1623,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1666,9 +1635,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ueq_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; ZVFH-NEXT: vmnor.mm v0, v11, v10 +; ZVFH-NEXT: vmflt.vf v2, v8, fa0, v0.t +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16: @@ -1681,9 +1650,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1695,9 +1664,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ueq_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmflt.vf v11, v8, fa0, v0.t -; ZVFH-NEXT: vmnor.mm v0, v11, v10 +; ZVFH-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_vf_swap_nxv8f16: @@ -1710,9 +1679,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1724,8 +1693,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ugt_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v8, v10, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_vv_nxv8f16: @@ -1734,8 +1703,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1745,8 +1714,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ugt_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16: @@ -1759,8 +1728,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1772,8 +1741,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ugt_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_vf_swap_nxv8f16: @@ -1786,8 +1755,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1799,8 +1768,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_uge_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v8, v10, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_vv_nxv8f16: @@ -1809,8 +1778,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1820,8 +1789,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_uge_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16: @@ -1834,8 +1803,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1847,8 +1816,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_uge_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_vf_swap_nxv8f16: @@ -1861,8 +1830,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1874,8 +1843,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ult_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v10, v8, v0.t -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v10, v8, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_vv_nxv8f16: @@ -1884,8 +1853,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1895,8 +1864,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ult_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16: @@ -1909,8 +1878,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1922,8 +1891,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ult_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_vf_swap_nxv8f16: @@ -1936,8 +1905,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1949,8 +1918,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ule_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v10, v8, v0.t -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v10, v8, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vv_nxv8f16: @@ -1959,8 +1928,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1970,8 +1939,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ule_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16: @@ -1984,8 +1953,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1997,8 +1966,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ule_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vf_swap_nxv8f16: @@ -2011,8 +1980,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2024,8 +1993,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_une_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmfne.vv v0, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_une_vv_nxv8f16: @@ -2034,8 +2002,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2045,8 +2012,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_une_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfne.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_une_vf_nxv8f16: @@ -2059,8 +2025,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2072,8 +2037,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_une_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfne.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_une_vf_swap_nxv8f16: @@ -2086,8 +2050,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2099,9 +2062,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vv v12, v10, v10, v0.t -; ZVFH-NEXT: vmfne.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vv v2, v10, v10, v0.t +; ZVFH-NEXT: vmfne.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16: @@ -2109,12 +2072,12 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v10, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2126,9 +2089,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vf v12, v10, fa0, v0.t -; ZVFH-NEXT: vmfne.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vf v2, v10, fa0, v0.t +; ZVFH-NEXT: vmfne.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16: @@ -2140,12 +2103,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v12, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2159,9 +2122,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vf v12, v10, fa0, v0.t -; ZVFH-NEXT: vmfne.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmor.mm v0, v12, v10 +; ZVFH-NEXT: vmfne.vf v2, v10, fa0, v0.t +; ZVFH-NEXT: vmfne.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vf_swap_nxv8f16: @@ -2173,12 +2136,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v12, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2199,35 +2162,34 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: addi a1, sp, 16 ; ZVFH-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; ZVFH-NEXT: slli a3, a1, 3 -; ZVFH-NEXT: add a3, a0, a3 -; ZVFH-NEXT: vl8re16.v v8, (a3) -; ZVFH-NEXT: slli a3, a1, 2 +; ZVFH-NEXT: csrr a3, vlenb +; ZVFH-NEXT: srli a1, a3, 1 +; ZVFH-NEXT: vsetvli a4, zero, e8, m1, ta, ma +; ZVFH-NEXT: vmv1r.v v7, v0 +; ZVFH-NEXT: vslidedown.vx v0, v0, a1 +; ZVFH-NEXT: slli a4, a3, 3 +; ZVFH-NEXT: add a4, a0, a4 +; ZVFH-NEXT: vl8re16.v v24, (a4) +; ZVFH-NEXT: slli a3, a3, 2 ; ZVFH-NEXT: sub a4, a2, a3 ; ZVFH-NEXT: sltu a5, a2, a4 ; ZVFH-NEXT: addi a5, a5, -1 +; ZVFH-NEXT: vl8re16.v v8, (a0) ; ZVFH-NEXT: and a4, a5, a4 -; ZVFH-NEXT: srli a1, a1, 1 -; ZVFH-NEXT: vl8re16.v v24, (a0) -; ZVFH-NEXT: vmv1r.v v6, v0 -; ZVFH-NEXT: vslidedown.vx v0, v0, a1 ; ZVFH-NEXT: vsetvli zero, a4, e16, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v7, v16, v8, v0.t +; ZVFH-NEXT: vmfeq.vv v16, v16, v24, v0.t ; ZVFH-NEXT: bltu a2, a3, .LBB85_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a2, a3 ; ZVFH-NEXT: .LBB85_2: ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v6 +; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vmfeq.vv v16, v8, v24, v0.t +; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmfeq.vv v0, v24, v8, v0.t ; ZVFH-NEXT: add a0, a1, a1 ; ZVFH-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; ZVFH-NEXT: vslideup.vx v16, v7, a1 -; ZVFH-NEXT: vmv.v.v v0, v16 +; ZVFH-NEXT: vslideup.vx v0, v16, a1 ; ZVFH-NEXT: csrr a0, vlenb ; ZVFH-NEXT: slli a0, a0, 3 ; ZVFH-NEXT: add sp, sp, a0 @@ -2239,13 +2201,13 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a3, 26 +; ZVFHMIN-NEXT: li a3, 34 ; ZVFHMIN-NEXT: mul a1, a1, a3 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a3, a1, 3 -; ZVFHMIN-NEXT: add a1, a3, a1 +; ZVFHMIN-NEXT: li a3, 25 +; ZVFHMIN-NEXT: mul a1, a1, a3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -2266,8 +2228,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: and a7, a7, a1 ; ZVFHMIN-NEXT: srli a1, a3, 1 ; ZVFHMIN-NEXT: csrr t0, vlenb -; ZVFHMIN-NEXT: li t1, 25 -; ZVFHMIN-NEXT: mul t0, t0, t1 +; ZVFHMIN-NEXT: slli t1, t0, 5 +; ZVFHMIN-NEXT: add t0, t1, t0 ; ZVFHMIN-NEXT: add t0, sp, t0 ; ZVFHMIN-NEXT: addi t0, t0, 16 ; ZVFHMIN-NEXT: vs1r.v v0, (t0) # Unknown-size Folded Spill @@ -2289,100 +2251,129 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli t0, a0, 3 +; ZVFHMIN-NEXT: add a0, t0, a0 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: bltu a6, a4, .LBB85_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a6, a4 ; ZVFHMIN-NEXT: .LBB85_2: ; ZVFHMIN-NEXT: vsetvli zero, a7, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v20, v24, v8, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a7, a0, 3 +; ZVFHMIN-NEXT: add a0, a7, a0 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a6, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v6 -; ZVFHMIN-NEXT: vmfeq.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v24, v0.t ; ZVFHMIN-NEXT: add a0, a3, a3 ; ZVFHMIN-NEXT: bltu a2, a5, .LBB85_4 ; ZVFHMIN-NEXT: # %bb.3: ; ZVFHMIN-NEXT: mv a2, a5 ; ZVFHMIN-NEXT: .LBB85_4: ; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v16, v20, a3 +; ZVFHMIN-NEXT: addi a5, sp, 16 +; ZVFHMIN-NEXT: vl1r.v v8, (a5) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vslideup.vx v0, v8, a3 ; ZVFHMIN-NEXT: csrr a5, vlenb +; ZVFHMIN-NEXT: slli a6, a5, 3 +; ZVFHMIN-NEXT: add a5, a6, a5 ; ZVFHMIN-NEXT: add a5, sp, a5 ; ZVFHMIN-NEXT: addi a5, a5, 16 -; ZVFHMIN-NEXT: vs1r.v v16, (a5) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: sub a5, a2, a4 ; ZVFHMIN-NEXT: sltu a6, a2, a5 ; ZVFHMIN-NEXT: addi a6, a6, -1 ; ZVFHMIN-NEXT: and a5, a6, a5 ; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: li a7, 25 -; ZVFHMIN-NEXT: mul a6, a6, a7 +; ZVFHMIN-NEXT: slli a7, a6, 5 +; ZVFHMIN-NEXT: add a6, a7, a6 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 ; ZVFHMIN-NEXT: vl1r.v v8, (a6) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vslidedown.vx v8, v8, a3 -; ZVFHMIN-NEXT: addi a6, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a6) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3 ; ZVFHMIN-NEXT: vsetvli a6, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: slli a7, a6, 3 -; ZVFHMIN-NEXT: add a6, a7, a6 +; ZVFHMIN-NEXT: li a7, 25 +; ZVFHMIN-NEXT: mul a6, a6, a7 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 ; ZVFHMIN-NEXT: csrr a6, vlenb ; ZVFHMIN-NEXT: slli a7, a6, 4 ; ZVFHMIN-NEXT: add a6, a7, a6 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a6) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 +; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; ZVFHMIN-NEXT: addi a5, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a5) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmfeq.vv v6, v24, v8, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a5, vlenb +; ZVFHMIN-NEXT: add a5, sp, a5 +; ZVFHMIN-NEXT: addi a5, a5, 16 +; ZVFHMIN-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a2, a4, .LBB85_6 ; ZVFHMIN-NEXT: # %bb.5: ; ZVFHMIN-NEXT: mv a2, a4 ; ZVFHMIN-NEXT: .LBB85_6: ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: li a5, 25 +; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a5, a4, 4 ; ZVFHMIN-NEXT: add a4, a5, a4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a4, a2, 5 +; ZVFHMIN-NEXT: add a2, a4, a2 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl1r.v v0, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v8, v6, a3 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vslideup.vx v0, v8, a3 ; ZVFHMIN-NEXT: add a0, a1, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a2, a0, 3 +; ZVFHMIN-NEXT: add a0, a2, a0 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vslideup.vx v8, v9, a1 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 +; ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vslideup.vx v0, v8, a1 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 26 +; ZVFHMIN-NEXT: li a1, 34 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -2919,8 +2910,7 @@ define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3f64(<vscale x 3 x double> %va, <vscale ; CHECK-LABEL: fcmp_oeq_vv_nxv3f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3f64(<vscale x 3 x double> %va, <vscale x 3 x double> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl) ret <vscale x 3 x i1> %v @@ -2932,8 +2922,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_oeq_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2943,8 +2932,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_oeq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2956,8 +2944,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_oeq_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2969,8 +2956,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2980,8 +2966,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ogt_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2993,8 +2978,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_ogt_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3006,8 +2990,7 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3017,8 +3000,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_oge_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3030,8 +3012,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_oge_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3043,8 +3024,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3054,8 +3034,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_olt_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3067,8 +3046,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_olt_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3080,8 +3058,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3091,8 +3068,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ole_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3104,8 +3080,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_ole_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3118,8 +3093,8 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v24 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3130,8 +3105,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3144,8 +3119,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3157,9 +3132,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16, v0.t -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v16, v16, v16, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3171,9 +3146,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v16, v16, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3187,9 +3162,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vf v16, v16, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3202,8 +3177,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v24 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3214,8 +3189,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3228,8 +3203,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3241,8 +3216,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3252,8 +3227,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ugt_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3265,8 +3240,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_ugt_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3278,8 +3253,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3289,8 +3264,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_uge_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3302,8 +3277,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_uge_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3315,8 +3290,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3326,8 +3301,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ult_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3339,8 +3314,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_ult_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3352,8 +3327,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3363,8 +3338,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ule_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3376,8 +3351,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_ule_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3389,8 +3364,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_une_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmfne.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3400,8 +3374,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_une_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfne.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3413,8 +3386,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_une_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfne.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3426,9 +3398,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16, v0.t -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v16, v16, v16, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3440,9 +3412,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v16, v16, fa0, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3456,9 +3428,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vf v16, v16, fa0, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3474,12 +3446,12 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: li a3, 34 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: li a3, 25 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 @@ -3490,7 +3462,7 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc ; CHECK-NEXT: slli t1, a3, 3 ; CHECK-NEXT: srli a4, a3, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v0, a4 +; CHECK-NEXT: vslidedown.vx v7, v0, a4 ; CHECK-NEXT: srli a1, a3, 3 ; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; CHECK-NEXT: add a5, a2, t1 @@ -3506,111 +3478,103 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc ; CHECK-NEXT: .LBB171_2: ; CHECK-NEXT: add t0, a2, t0 ; CHECK-NEXT: add t1, a0, t1 -; CHECK-NEXT: vslidedown.vx v5, v6, a1 +; CHECK-NEXT: vslidedown.vx v6, v7, a1 ; CHECK-NEXT: add t2, a2, t2 ; CHECK-NEXT: vl8re64.v v24, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: sub a2, a7, a3 ; CHECK-NEXT: sltu t3, a7, a2 ; CHECK-NEXT: addi t3, t3, -1 ; CHECK-NEXT: and a2, t3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v7, v16, v8, v0.t +; CHECK-NEXT: vmfeq.vv v8, v16, v8, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a7, a3, .LBB171_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a7, a3 ; CHECK-NEXT: .LBB171_4: ; CHECK-NEXT: vl8re64.v v8, (t0) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: slli t0, a2, 4 +; CHECK-NEXT: add a2, t0, a2 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v8, (t1) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li t0, 24 -; CHECK-NEXT: mul a2, a2, t0 +; CHECK-NEXT: slli t0, a2, 3 +; CHECK-NEXT: add a2, t0, a2 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v8, (t2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 +; CHECK-NEXT: li a2, 25 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vmfeq.vv v24, v8, v24, v0.t ; CHECK-NEXT: add a0, a1, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma ; CHECK-NEXT: sub a0, a6, a5 ; CHECK-NEXT: sltu a2, a6, a0 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: vslideup.vx v16, v7, a1 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl1r.v v0, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vx v24, v0, a1 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bltu a0, a3, .LBB171_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB171_6: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v17, v8, v24, v0.t +; CHECK-NEXT: vmfeq.vv v8, v16, v8, v0.t ; CHECK-NEXT: add a2, a4, a1 ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v16, v17, a4 +; CHECK-NEXT: vslideup.vx v24, v8, a4 ; CHECK-NEXT: sub a2, a0, a3 ; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: slli a2, a0, 4 +; CHECK-NEXT: add a0, a2, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a2, a0, 3 +; CHECK-NEXT: add a0, a2, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v17, v8, v24, v0.t +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v8, v16, v8, v0.t ; CHECK-NEXT: slli a0, a1, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v16, v17, a0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vslideup.vx v24, v8, a0 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: li a1, 34 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll index aee2551..5d14143 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll @@ -580,9 +580,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_one_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10 -; ZVFH-NEXT: vmflt.vv v13, v10, v8 -; ZVFH-NEXT: vmor.mm v0, v13, v12 +; ZVFH-NEXT: vmflt.vv v0, v8, v10 +; ZVFH-NEXT: vmflt.vv v2, v10, v8 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_vv_nxv8f16: @@ -591,9 +591,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16 -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp one <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -603,9 +603,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_one_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vmfgt.vf v11, v8, fa0 -; ZVFH-NEXT: vmor.mm v0, v11, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vmfgt.vf v2, v8, fa0 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16: @@ -618,9 +618,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmflt.vv v9, v16, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -632,9 +632,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_one_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vmflt.vf v11, v8, fa0 -; ZVFH-NEXT: vmor.mm v0, v11, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vmflt.vf v2, v8, fa0 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_fv_nxv8f16: @@ -647,9 +647,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16 -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -705,9 +705,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmfeq.vv v10, v8, v8 -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 +; ZVFH-NEXT: vmfeq.vv v2, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16: @@ -715,12 +715,12 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v10, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmand.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmand.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ord <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -731,9 +731,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfeq.vf v12, v10, fa0 -; ZVFH-NEXT: vmfeq.vv v10, v8, v8 -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vf v0, v10, fa0 +; ZVFH-NEXT: vmfeq.vv v2, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16: @@ -745,12 +745,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmand.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -763,9 +763,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfeq.vf v12, v10, fa0 -; ZVFH-NEXT: vmfeq.vv v10, v8, v8 -; ZVFH-NEXT: vmand.mm v0, v12, v10 +; ZVFH-NEXT: vmfeq.vf v0, v10, fa0 +; ZVFH-NEXT: vmfeq.vv v2, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_fv_nxv8f16: @@ -777,12 +777,12 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmand.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmand.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -794,9 +794,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16_nonans: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmfeq.vv v10, v8, v8 -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 +; ZVFH-NEXT: vmfeq.vv v2, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16_nonans: @@ -804,12 +804,12 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v10, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmand.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmand.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ord <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -820,9 +820,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfeq.vf v12, v10, fa0 -; ZVFH-NEXT: vmfeq.vv v10, v8, v8 -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vf v0, v10, fa0 +; ZVFH-NEXT: vmfeq.vv v2, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16_nonans: @@ -834,12 +834,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmand.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -851,9 +851,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ueq_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10 -; ZVFH-NEXT: vmflt.vv v13, v10, v8 -; ZVFH-NEXT: vmnor.mm v0, v13, v12 +; ZVFH-NEXT: vmflt.vv v0, v8, v10 +; ZVFH-NEXT: vmflt.vv v2, v10, v8 +; ZVFH-NEXT: vmnor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_vv_nxv8f16: @@ -862,9 +862,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16 -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16 +; ZVFHMIN-NEXT: vmnor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ueq <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -874,9 +874,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ueq_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vmfgt.vf v11, v8, fa0 -; ZVFH-NEXT: vmnor.mm v0, v11, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vmfgt.vf v2, v8, fa0 +; ZVFH-NEXT: vmnor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16: @@ -889,9 +889,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmflt.vv v9, v16, v12 -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12 +; ZVFHMIN-NEXT: vmnor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -903,9 +903,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ueq_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vmflt.vf v11, v8, fa0 -; ZVFH-NEXT: vmnor.mm v0, v11, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vmflt.vf v2, v8, fa0 +; ZVFH-NEXT: vmnor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_fv_nxv8f16: @@ -918,9 +918,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16 -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16 +; ZVFHMIN-NEXT: vmnor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -976,8 +976,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ugt_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v8, v10 -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v8, v10 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_vv_nxv8f16: @@ -986,8 +986,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ugt <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -997,8 +997,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ugt_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16: @@ -1011,8 +1011,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1024,8 +1024,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ugt_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_fv_nxv8f16: @@ -1038,8 +1038,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1095,8 +1095,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_uge_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10 -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v8, v10 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_vv_nxv8f16: @@ -1105,8 +1105,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp uge <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -1116,8 +1116,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_uge_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16: @@ -1130,8 +1130,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1143,8 +1143,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_uge_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_fv_nxv8f16: @@ -1157,8 +1157,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1214,8 +1214,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ult_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v10, v8 -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v10, v8 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_vv_nxv8f16: @@ -1224,8 +1224,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ult <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -1235,8 +1235,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ult_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16: @@ -1249,8 +1249,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1262,8 +1262,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ult_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_fv_nxv8f16: @@ -1276,8 +1276,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1333,8 +1333,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ule_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v10, v8 -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v10, v8 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vv_nxv8f16: @@ -1343,8 +1343,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ule <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -1354,8 +1354,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ule_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16: @@ -1368,8 +1368,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1381,8 +1381,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ule_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_fv_nxv8f16: @@ -1395,8 +1395,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1565,9 +1565,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vv v12, v10, v10 -; ZVFH-NEXT: vmfne.vv v10, v8, v8 -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vv v0, v10, v10 +; ZVFH-NEXT: vmfne.vv v2, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16: @@ -1575,12 +1575,12 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v10, v12, v12 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp uno <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -1591,9 +1591,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfne.vf v12, v10, fa0 -; ZVFH-NEXT: vmfne.vv v10, v8, v8 -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vf v0, v10, fa0 +; ZVFH-NEXT: vmfne.vv v2, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16: @@ -1605,12 +1605,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1623,9 +1623,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfne.vf v12, v10, fa0 -; ZVFH-NEXT: vmfne.vv v10, v8, v8 -; ZVFH-NEXT: vmor.mm v0, v12, v10 +; ZVFH-NEXT: vmfne.vf v0, v10, fa0 +; ZVFH-NEXT: vmfne.vv v2, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_fv_nxv8f16: @@ -1637,12 +1637,12 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1654,9 +1654,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16_nonans: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vv v12, v10, v10 -; ZVFH-NEXT: vmfne.vv v10, v8, v8 -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vv v0, v10, v10 +; ZVFH-NEXT: vmfne.vv v2, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16_nonans: @@ -1664,12 +1664,12 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v10, v12, v12 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp uno <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -1680,9 +1680,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfne.vf v12, v10, fa0 -; ZVFH-NEXT: vmfne.vv v10, v8, v8 -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vf v0, v10, fa0 +; ZVFH-NEXT: vmfne.vv v2, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16_nonans: @@ -1694,12 +1694,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1991,9 +1991,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp one <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2003,9 +2003,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_one_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2017,9 +2017,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_one_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2053,9 +2053,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp ord <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2066,9 +2066,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2081,9 +2081,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2095,9 +2095,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32_nonans(<vscale x 8 x float> %va, < ; CHECK-LABEL: fcmp_ord_vv_nxv8f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp ord <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2108,9 +2108,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32_nonans(<vscale x 8 x float> %va, f ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2122,9 +2122,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp ueq <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2134,9 +2134,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ueq_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2148,9 +2148,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ueq_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2184,8 +2184,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ugt <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2195,8 +2195,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ugt_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2208,8 +2208,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ugt_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2243,8 +2243,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp uge <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2254,8 +2254,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_uge_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2267,8 +2267,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_uge_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2302,8 +2302,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ult <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2313,8 +2313,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ult_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2326,8 +2326,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ult_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2361,8 +2361,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ule <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2372,8 +2372,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ule_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2385,8 +2385,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ule_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2476,9 +2476,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp uno <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2489,9 +2489,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2504,9 +2504,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2518,9 +2518,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32_nonans(<vscale x 8 x float> %va, < ; CHECK-LABEL: fcmp_uno_vv_nxv8f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp uno <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2531,9 +2531,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32_nonans(<vscale x 8 x float> %va, f ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2825,9 +2825,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp one <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -2837,9 +2837,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_one_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2851,9 +2851,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_one_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2887,9 +2887,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp ord <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -2900,9 +2900,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2915,9 +2915,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2929,9 +2929,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64_nonans(<vscale x 8 x double> %va, ; CHECK-LABEL: fcmp_ord_vv_nxv8f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp ord <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -2942,9 +2942,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64_nonans(<vscale x 8 x double> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2956,9 +2956,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp ueq <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -2968,9 +2968,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ueq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2982,9 +2982,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ueq_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3018,8 +3018,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ugt <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3029,8 +3029,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ugt_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3042,8 +3042,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ugt_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3077,8 +3077,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp uge <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3088,8 +3088,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_uge_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3101,8 +3101,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_uge_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3136,8 +3136,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ult <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3147,8 +3147,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ult_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3160,8 +3160,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ult_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3195,8 +3195,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ule <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3206,8 +3206,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ule_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3219,8 +3219,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ule_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3310,9 +3310,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v0, v16, v16 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp uno <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3323,9 +3323,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3338,9 +3338,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3352,9 +3352,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64_nonans(<vscale x 8 x double> %va, ; CHECK-LABEL: fcmp_uno_vv_nxv8f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v0, v16, v16 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp uno <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3365,9 +3365,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64_nonans(<vscale x 8 x double> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3382,52 +3382,52 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) { ; RV32: # %bb.0: ; RV32-NEXT: fcvt.d.w fa5, zero ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: vmfeq.vf v24, v16, fa5 +; RV32-NEXT: vmfeq.vf v16, v16, fa5 ; RV32-NEXT: vmfeq.vf v0, v8, fa5 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: srli a0, a0, 3 ; RV32-NEXT: add a1, a0, a0 ; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vslideup.vx v0, v24, a0 +; RV32-NEXT: vslideup.vx v0, v16, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: fcmp_oeq_vf_nx16f64: ; RV64: # %bb.0: ; RV64-NEXT: fmv.d.x fa5, zero ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vmfeq.vf v24, v16, fa5 +; RV64-NEXT: vmfeq.vf v16, v16, fa5 ; RV64-NEXT: vmfeq.vf v0, v8, fa5 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: srli a0, a0, 3 ; RV64-NEXT: add a1, a0, a0 ; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vslideup.vx v0, v24, a0 +; RV64-NEXT: vslideup.vx v0, v16, a0 ; RV64-NEXT: ret ; ; ZVFHMIN32-LABEL: fcmp_oeq_vf_nx16f64: ; ZVFHMIN32: # %bb.0: ; ZVFHMIN32-NEXT: fcvt.d.w fa5, zero ; ZVFHMIN32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; ZVFHMIN32-NEXT: vmfeq.vf v24, v16, fa5 +; ZVFHMIN32-NEXT: vmfeq.vf v16, v16, fa5 ; ZVFHMIN32-NEXT: vmfeq.vf v0, v8, fa5 ; ZVFHMIN32-NEXT: csrr a0, vlenb ; ZVFHMIN32-NEXT: srli a0, a0, 3 ; ZVFHMIN32-NEXT: add a1, a0, a0 ; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; ZVFHMIN32-NEXT: vslideup.vx v0, v24, a0 +; ZVFHMIN32-NEXT: vslideup.vx v0, v16, a0 ; ZVFHMIN32-NEXT: ret ; ; ZVFHMIN64-LABEL: fcmp_oeq_vf_nx16f64: ; ZVFHMIN64: # %bb.0: ; ZVFHMIN64-NEXT: fmv.d.x fa5, zero ; ZVFHMIN64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; ZVFHMIN64-NEXT: vmfeq.vf v24, v16, fa5 +; ZVFHMIN64-NEXT: vmfeq.vf v16, v16, fa5 ; ZVFHMIN64-NEXT: vmfeq.vf v0, v8, fa5 ; ZVFHMIN64-NEXT: csrr a0, vlenb ; ZVFHMIN64-NEXT: srli a0, a0, 3 ; ZVFHMIN64-NEXT: add a1, a0, a0 ; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; ZVFHMIN64-NEXT: vslideup.vx v0, v24, a0 +; ZVFHMIN64-NEXT: vslideup.vx v0, v16, a0 ; ZVFHMIN64-NEXT: ret %vc = fcmp oeq <vscale x 16 x double> %va, zeroinitializer ret <vscale x 16 x i1> %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll index 0dede98..930228c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -1089,48 +1089,37 @@ define <vscale x 128 x i1> @icmp_eq_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a4, a0, a1 -; CHECK-NEXT: vl8r.v v8, (a4) -; CHECK-NEXT: vl8r.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a4) +; CHECK-NEXT: vl8r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: sub a0, a3, a1 -; CHECK-NEXT: sltu a4, a3, a0 ; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a0, a4, a0 +; CHECK-NEXT: sub a0, a3, a1 +; CHECK-NEXT: sltu a2, a3, a0 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a0, a2, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vv v7, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v16, v16, v24, v0.t ; CHECK-NEXT: bltu a3, a1, .LBB96_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB96_2: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vmseq.vv v0, v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1151,16 +1140,15 @@ define <vscale x 128 x i1> @icmp_eq_vx_nxv128i8(<vscale x 128 x i8> %va, i8 %b, ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB97_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB97_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v24 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i8 0 %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer @@ -1181,16 +1169,15 @@ define <vscale x 128 x i1> @icmp_eq_vx_swap_nxv128i8(<vscale x 128 x i8> %va, i8 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB98_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB98_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v24 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i8 0 %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer @@ -1698,8 +1685,7 @@ define <vscale x 8 x i1> @icmp_eq_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 ; CHECK-LABEL: icmp_eq_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmseq.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1709,8 +1695,7 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vs ; CHECK-LABEL: icmp_eq_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1722,8 +1707,7 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %b ; CHECK-LABEL: icmp_eq_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1735,8 +1719,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 ; CHECK-LABEL: icmp_eq_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1746,8 +1729,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vscal ; CHECK-LABEL: icmp_eq_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1757,8 +1739,7 @@ define <vscale x 8 x i1> @icmp_ne_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 ; CHECK-LABEL: icmp_ne_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsne.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsne.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1768,8 +1749,7 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vs ; CHECK-LABEL: icmp_ne_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsne.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1781,8 +1761,7 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %b ; CHECK-LABEL: icmp_ne_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsne.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1794,8 +1773,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 ; CHECK-LABEL: icmp_ne_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1805,8 +1783,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vscal ; CHECK-LABEL: icmp_ne_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1816,8 +1793,7 @@ define <vscale x 8 x i1> @icmp_ugt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_ugt_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsltu.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1827,8 +1803,7 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_ugt_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsgtu.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1840,8 +1815,7 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_ugt_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsltu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1853,8 +1827,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_ugt_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1864,8 +1837,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_ugt_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1875,8 +1847,7 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_uge_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsleu.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1886,10 +1857,9 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_uge_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vv v12, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1901,8 +1871,7 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_uge_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1914,8 +1883,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_uge_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1925,8 +1893,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_uge_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1936,8 +1903,7 @@ define <vscale x 8 x i1> @icmp_ult_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_ult_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsltu.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1947,8 +1913,7 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_ult_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsltu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1960,8 +1925,7 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_ult_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsgtu.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1973,8 +1937,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_ult_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1984,8 +1947,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_ult_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1995,8 +1957,7 @@ define <vscale x 8 x i1> @icmp_sgt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sgt_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmslt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmslt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2006,8 +1967,7 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_sgt_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2019,8 +1979,7 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_sgt_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmslt.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmslt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2032,8 +1991,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sgt_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2043,8 +2001,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_sgt_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2054,8 +2011,7 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sge_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2065,10 +2021,9 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_sge_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2080,8 +2035,7 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_sge_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2093,8 +2047,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sge_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2104,8 +2057,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_sge_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2115,8 +2067,7 @@ define <vscale x 8 x i1> @icmp_slt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_slt_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmslt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmslt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2126,8 +2077,7 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_slt_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmslt.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmslt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2139,8 +2089,7 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_slt_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2152,8 +2101,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_slt_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2163,8 +2111,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_slt_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2174,8 +2121,7 @@ define <vscale x 8 x i1> @icmp_sle_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sle_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2185,8 +2131,7 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_sle_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2198,10 +2143,9 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2213,8 +2157,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sle_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2224,8 +2167,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_sle_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2244,35 +2186,34 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re32.v v8, (a3) -; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a1, a3, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8re32.v v24, (a4) +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: sub a4, a2, a3 ; CHECK-NEXT: sltu a5, a2, a4 ; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: vl8re32.v v8, (a0) ; CHECK-NEXT: and a4, a5, a4 -; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vv v7, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v16, v16, v24, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB189_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB189_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vv v0, v24, v8, v0.t ; CHECK-NEXT: add a0, a1, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v16, v7, a1 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vslideup.vx v0, v16, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -2296,18 +2237,17 @@ define <vscale x 32 x i1> @icmp_eq_vx_nxv32i32(<vscale x 32 x i32> %va, i32 %b, ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a4, a5, a4 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a1, a3, .LBB190_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB190_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v16, v24, a2 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vslideup.vx v0, v16, a2 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer @@ -2329,18 +2269,17 @@ define <vscale x 32 x i1> @icmp_eq_vx_swap_nxv32i32(<vscale x 32 x i32> %va, i32 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a4, a5, a4 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a1, a3, .LBB191_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB191_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v16, v24, a2 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vslideup.vx v0, v16, a2 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer @@ -3100,8 +3039,7 @@ define <vscale x 8 x i1> @icmp_eq_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; CHECK-LABEL: icmp_eq_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3116,18 +3054,16 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmseq.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_eq_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmseq.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmseq.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3144,18 +3080,16 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmseq.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_eq_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmseq.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmseq.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3167,8 +3101,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; CHECK-LABEL: icmp_eq_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3178,8 +3111,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vscal ; CHECK-LABEL: icmp_eq_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3189,8 +3121,7 @@ define <vscale x 8 x i1> @icmp_ne_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; CHECK-LABEL: icmp_ne_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsne.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsne.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3205,18 +3136,16 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsne.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsne.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ne_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsne.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsne.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3233,18 +3162,16 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsne.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsne.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ne_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsne.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsne.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3256,8 +3183,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; CHECK-LABEL: icmp_ne_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsne.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3267,8 +3193,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vscal ; CHECK-LABEL: icmp_ne_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsne.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3278,8 +3203,7 @@ define <vscale x 8 x i1> @icmp_ugt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_ugt_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsltu.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsltu.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3294,18 +3218,16 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsltu.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsltu.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ugt_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsgtu.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3322,18 +3244,16 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsltu.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsltu.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ugt_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsltu.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsltu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3345,8 +3265,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_ugt_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgtu.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3356,8 +3275,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_ugt_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsleu.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3367,8 +3285,7 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_uge_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsleu.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsleu.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3383,20 +3300,18 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsleu.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsleu.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_uge_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsleu.vv v16, v24, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsleu.vv v0, v16, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3413,18 +3328,16 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsleu.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsleu.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_uge_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsleu.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsleu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3436,8 +3349,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_uge_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgtu.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgtu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3447,8 +3359,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_uge_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsleu.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsleu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3458,8 +3369,7 @@ define <vscale x 8 x i1> @icmp_ult_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_ult_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsltu.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsltu.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3474,18 +3384,16 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsltu.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsltu.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ult_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsltu.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsltu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3502,18 +3410,16 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsltu.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsltu.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ult_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsgtu.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3525,8 +3431,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_ult_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsleu.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3536,8 +3441,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_ult_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgtu.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3547,8 +3451,7 @@ define <vscale x 8 x i1> @icmp_sgt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sgt_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmslt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmslt.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3563,18 +3466,16 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmslt.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmslt.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sgt_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsgt.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsgt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3591,18 +3492,16 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmslt.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmslt.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sgt_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmslt.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmslt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3614,8 +3513,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sgt_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgt.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3625,8 +3523,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_sgt_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3636,8 +3533,7 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sge_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsle.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3652,20 +3548,18 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsle.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsle.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sge_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsle.vv v0, v16, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3682,18 +3576,16 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsle.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsle.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sge_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsle.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsle.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3705,8 +3597,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sge_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgt.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3716,8 +3607,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_sge_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3727,8 +3617,7 @@ define <vscale x 8 x i1> @icmp_slt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_slt_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmslt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmslt.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3743,18 +3632,16 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmslt.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmslt.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_slt_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmslt.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmslt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3771,18 +3658,16 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmslt.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmslt.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_slt_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsgt.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsgt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3794,8 +3679,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_slt_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3805,8 +3689,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_slt_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgt.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3816,8 +3699,7 @@ define <vscale x 8 x i1> @icmp_sle_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sle_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsle.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3832,18 +3714,16 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsle.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsle.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sle_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsle.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsle.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3860,20 +3740,18 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsle.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsle.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsle.vv v0, v16, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3885,8 +3763,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sle_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3896,8 +3773,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_sle_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgt.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll index 90ffeff..46de7bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll @@ -2981,10 +2981,10 @@ define <vscale x 16 x i1> @icmp_eq_vi_nx16i64(<vscale x 16 x i64> %va) { ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vmseq.vi v16, v16, 0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vx v0, v24, a0 +; CHECK-NEXT: vslideup.vx v0, v16, a0 ; CHECK-NEXT: ret %vc = icmp eq <vscale x 16 x i64> %va, zeroinitializer ret <vscale x 16 x i1> %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll index f488baf..1491bb6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll @@ -3,9 +3,9 @@ ; RUN: -verify-machineinstrs | FileCheck %s ; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v < %s \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+zvfh,+v,+fast-unaligned-access < %s \ +; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+zvfh,+v,+unaligned-vector-mem < %s \ ; RUN: -verify-machineinstrs | FileCheck --check-prefix=FAST %s -; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v,+fast-unaligned-access < %s \ +; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v,+unaligned-vector-mem < %s \ ; RUN: -verify-machineinstrs | FileCheck --check-prefix=FAST %s diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 2fb6ee3..369141a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -13,16 +13,17 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_load_nxv16i ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v8, a0 -; CHECK-NEXT: vmv1r.v v12, v8 +; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vnsrl.wi v12, v8, 0 -; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vnsrl.wi v12, v8, 8 -; CHECK-NEXT: vmsne.vi v8, v12, 0 +; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vmsne.vi v8, v8, 0 +; CHECK-NEXT: vnsrl.wi v10, v12, 8 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: ret %vec = load <vscale x 32 x i1>, ptr %p %retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.experimental.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index f8d2056..889e7d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -17,9 +17,10 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 ; CHECK-NEXT: vnsrl.wi v8, v12, 0 -; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vnsrl.wi v10, v12, 8 -; CHECK-NEXT: vmsne.vi v8, v10, 0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: ret %retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.experimental.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec) ret {<vscale x 16 x i1>, <vscale x 16 x i1>} %retval @@ -90,24 +91,25 @@ declare {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.experimental.vector.deint define {<vscale x 64 x i1>, <vscale x 64 x i1>} @vector_deinterleave_nxv64i1_nxv128i1(<vscale x 128 x i1> %vec) { ; CHECK-LABEL: vector_deinterleave_nxv64i1_nxv128i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v8 +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v16, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v28, v16, 0 +; CHECK-NEXT: vnsrl.wi v12, v24, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v8, 8 -; CHECK-NEXT: vnsrl.wi v28, v16, 8 +; CHECK-NEXT: vnsrl.wi v0, v16, 8 +; CHECK-NEXT: vnsrl.wi v4, v24, 8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v8, v24, 0 +; CHECK-NEXT: vmsne.vi v0, v0, 0 +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: ret %retval = call {<vscale x 64 x i1>, <vscale x 64 x i1>} @llvm.experimental.vector.deinterleave2.nxv128i1(<vscale x 128 x i1> %vec) ret {<vscale x 64 x i1>, <vscale x 64 x i1>} %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index baad9e1..2a0f0d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -17,15 +17,15 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc ; CHECK-NEXT: vwaddu.vv v12, v8, v10 ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: vwmaccu.vx v12, a1, v10 -; CHECK-NEXT: vmsne.vi v8, v14, 0 -; CHECK-NEXT: vmsne.vi v9, v12, 0 +; CHECK-NEXT: vmsne.vi v0, v14, 0 +; CHECK-NEXT: vmsne.vi v2, v12, 0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: add a2, a1, a1 ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v2, v0, a1 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vsm.v v9, (a0) +; CHECK-NEXT: vsm.v v2, (a0) ; CHECK-NEXT: ret %res = call <vscale x 32 x i1> @llvm.experimental.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) store <vscale x 32 x i1> %res, ptr %p @@ -98,43 +98,34 @@ define void @vector_interleave_store_nxv16i64_nxv8i64(<vscale x 8 x i64> %a, <vs ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv8r.v v0, v8 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, mu ; CHECK-NEXT: vid.v v24 ; CHECK-NEXT: vsrl.vi v26, v24, 1 ; CHECK-NEXT: vand.vi v24, v24, 1 -; CHECK-NEXT: vmsne.vi v28, v24, 0 -; CHECK-NEXT: vmv1r.v v0, v28 +; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmv4r.v v28, v4 ; CHECK-NEXT: vadd.vx v26, v26, a2, v0.t ; CHECK-NEXT: vmv4r.v v12, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v0, v8, v26 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v16, v12 +; CHECK-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmv4r.v v16, v28 ; CHECK-NEXT: vrgatherei16.vv v8, v16, v26 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: vs8r.v v8, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll index c454483..f0a2bd0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -19,13 +19,13 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1> ; CHECK-NEXT: vwaddu.vv v12, v8, v10 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vwmaccu.vx v12, a0, v10 -; CHECK-NEXT: vmsne.vi v8, v14, 0 +; CHECK-NEXT: vmsne.vi v2, v14, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v0, v8, a0 +; CHECK-NEXT: vslideup.vx v0, v2, a0 ; CHECK-NEXT: ret ; ; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1: @@ -39,13 +39,13 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1> ; ZVBB-NEXT: li a0, 1 ; ZVBB-NEXT: vmv1r.v v0, v16 ; ZVBB-NEXT: vwaddu.wx v12, v12, a0, v0.t -; ZVBB-NEXT: vmsne.vi v8, v14, 0 +; ZVBB-NEXT: vmsne.vi v2, v14, 0 ; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: srli a0, a0, 2 ; ZVBB-NEXT: add a1, a0, a0 ; ZVBB-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a0 +; ZVBB-NEXT: vslideup.vx v0, v2, a0 ; ZVBB-NEXT: ret %res = call <vscale x 32 x i1> @llvm.experimental.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) ret <vscale x 32 x i1> %res diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll index 15849fd..45e9854 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll @@ -1942,12 +1942,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ogt_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1958,12 +1956,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1976,12 +1972,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1993,12 +1987,10 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_oge_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2009,12 +2001,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2027,12 +2017,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2044,12 +2032,10 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_olt_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2060,12 +2046,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2078,12 +2062,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2095,12 +2077,10 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ole_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2111,12 +2091,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2129,12 +2107,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2146,14 +2122,13 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_one_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2164,14 +2139,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2184,14 +2158,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2203,9 +2176,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ord_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2216,9 +2189,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2231,9 +2204,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2245,14 +2218,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ueq_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2263,14 +2235,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2283,14 +2254,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2302,12 +2272,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ugt_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2318,12 +2287,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2336,12 +2304,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2353,12 +2320,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_uge_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2369,12 +2335,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2387,12 +2352,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2404,12 +2368,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ult_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2420,12 +2383,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2438,12 +2400,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2455,12 +2416,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ule_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2471,12 +2431,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2489,12 +2448,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2540,9 +2498,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_uno_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2553,9 +2511,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2568,9 +2526,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2617,12 +2575,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ogt_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2633,12 +2589,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2651,12 +2605,10 @@ define <vscale x 16 x i1> @fcmp_ogt_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2668,12 +2620,10 @@ define <vscale x 16 x i1> @fcmp_oge_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_oge_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2684,12 +2634,10 @@ define <vscale x 16 x i1> @fcmp_oge_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2702,12 +2650,10 @@ define <vscale x 16 x i1> @fcmp_oge_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2719,12 +2665,10 @@ define <vscale x 16 x i1> @fcmp_olt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_olt_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2735,12 +2679,10 @@ define <vscale x 16 x i1> @fcmp_olt_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2753,12 +2695,10 @@ define <vscale x 16 x i1> @fcmp_olt_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2770,12 +2710,10 @@ define <vscale x 16 x i1> @fcmp_ole_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ole_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2786,12 +2724,10 @@ define <vscale x 16 x i1> @fcmp_ole_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2804,12 +2740,10 @@ define <vscale x 16 x i1> @fcmp_ole_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2821,14 +2755,13 @@ define <vscale x 16 x i1> @fcmp_one_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_one_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2839,14 +2772,13 @@ define <vscale x 16 x i1> @fcmp_one_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2859,14 +2791,13 @@ define <vscale x 16 x i1> @fcmp_one_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2878,9 +2809,9 @@ define <vscale x 16 x i1> @fcmp_ord_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ord_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2891,9 +2822,9 @@ define <vscale x 16 x i1> @fcmp_ord_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2906,9 +2837,9 @@ define <vscale x 16 x i1> @fcmp_ord_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2920,14 +2851,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ueq_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2938,14 +2868,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2958,14 +2887,13 @@ define <vscale x 16 x i1> @fcmp_ueq_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2977,12 +2905,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ugt_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2993,12 +2920,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3011,12 +2937,11 @@ define <vscale x 16 x i1> @fcmp_ugt_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3028,12 +2953,11 @@ define <vscale x 16 x i1> @fcmp_uge_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_uge_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -3044,12 +2968,11 @@ define <vscale x 16 x i1> @fcmp_uge_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3062,12 +2985,11 @@ define <vscale x 16 x i1> @fcmp_uge_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3079,12 +3001,11 @@ define <vscale x 16 x i1> @fcmp_ult_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ult_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -3095,12 +3016,11 @@ define <vscale x 16 x i1> @fcmp_ult_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3113,12 +3033,11 @@ define <vscale x 16 x i1> @fcmp_ult_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3130,12 +3049,11 @@ define <vscale x 16 x i1> @fcmp_ule_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ule_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -3146,12 +3064,11 @@ define <vscale x 16 x i1> @fcmp_ule_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3164,12 +3081,11 @@ define <vscale x 16 x i1> @fcmp_ule_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3215,9 +3131,9 @@ define <vscale x 16 x i1> @fcmp_uno_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_uno_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -3228,9 +3144,9 @@ define <vscale x 16 x i1> @fcmp_uno_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3243,9 +3159,9 @@ define <vscale x 16 x i1> @fcmp_uno_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3292,12 +3208,10 @@ define <vscale x 32 x i1> @fcmp_ogt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ogt_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3308,12 +3222,10 @@ define <vscale x 32 x i1> @fcmp_ogt_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3326,12 +3238,10 @@ define <vscale x 32 x i1> @fcmp_ogt_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3343,12 +3253,10 @@ define <vscale x 32 x i1> @fcmp_oge_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_oge_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3359,12 +3267,10 @@ define <vscale x 32 x i1> @fcmp_oge_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3377,12 +3283,10 @@ define <vscale x 32 x i1> @fcmp_oge_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3394,12 +3298,10 @@ define <vscale x 32 x i1> @fcmp_olt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_olt_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3410,12 +3312,10 @@ define <vscale x 32 x i1> @fcmp_olt_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3428,12 +3328,10 @@ define <vscale x 32 x i1> @fcmp_olt_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3445,12 +3343,10 @@ define <vscale x 32 x i1> @fcmp_ole_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ole_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3461,12 +3357,10 @@ define <vscale x 32 x i1> @fcmp_ole_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3479,12 +3373,10 @@ define <vscale x 32 x i1> @fcmp_ole_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3496,14 +3388,13 @@ define <vscale x 32 x i1> @fcmp_one_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_one_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3514,14 +3405,13 @@ define <vscale x 32 x i1> @fcmp_one_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3534,14 +3424,13 @@ define <vscale x 32 x i1> @fcmp_one_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3553,9 +3442,9 @@ define <vscale x 32 x i1> @fcmp_ord_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ord_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3566,9 +3455,9 @@ define <vscale x 32 x i1> @fcmp_ord_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3581,9 +3470,9 @@ define <vscale x 32 x i1> @fcmp_ord_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3595,14 +3484,13 @@ define <vscale x 32 x i1> @fcmp_ueq_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ueq_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3613,14 +3501,13 @@ define <vscale x 32 x i1> @fcmp_ueq_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3633,14 +3520,13 @@ define <vscale x 32 x i1> @fcmp_ueq_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3652,12 +3538,11 @@ define <vscale x 32 x i1> @fcmp_ugt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ugt_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3668,12 +3553,11 @@ define <vscale x 32 x i1> @fcmp_ugt_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3686,12 +3570,11 @@ define <vscale x 32 x i1> @fcmp_ugt_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3703,12 +3586,11 @@ define <vscale x 32 x i1> @fcmp_uge_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_uge_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3719,12 +3601,11 @@ define <vscale x 32 x i1> @fcmp_uge_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3737,12 +3618,11 @@ define <vscale x 32 x i1> @fcmp_uge_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3754,12 +3634,11 @@ define <vscale x 32 x i1> @fcmp_ult_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ult_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3770,12 +3649,11 @@ define <vscale x 32 x i1> @fcmp_ult_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3788,12 +3666,11 @@ define <vscale x 32 x i1> @fcmp_ult_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3805,12 +3682,11 @@ define <vscale x 32 x i1> @fcmp_ule_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ule_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3821,12 +3697,11 @@ define <vscale x 32 x i1> @fcmp_ule_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3839,12 +3714,11 @@ define <vscale x 32 x i1> @fcmp_ule_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3890,9 +3764,9 @@ define <vscale x 32 x i1> @fcmp_uno_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_uno_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v0, v16, v16 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3903,9 +3777,9 @@ define <vscale x 32 x i1> @fcmp_uno_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3918,9 +3792,9 @@ define <vscale x 32 x i1> @fcmp_uno_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -5233,12 +5107,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5249,12 +5121,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5267,12 +5137,10 @@ define <vscale x 4 x i1> @fcmp_ogt_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5284,12 +5152,10 @@ define <vscale x 4 x i1> @fcmp_oge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5300,12 +5166,10 @@ define <vscale x 4 x i1> @fcmp_oge_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5318,12 +5182,10 @@ define <vscale x 4 x i1> @fcmp_oge_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5335,12 +5197,10 @@ define <vscale x 4 x i1> @fcmp_olt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5351,12 +5211,10 @@ define <vscale x 4 x i1> @fcmp_olt_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5369,12 +5227,10 @@ define <vscale x 4 x i1> @fcmp_olt_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5386,12 +5242,10 @@ define <vscale x 4 x i1> @fcmp_ole_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5402,12 +5256,10 @@ define <vscale x 4 x i1> @fcmp_ole_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5420,12 +5272,10 @@ define <vscale x 4 x i1> @fcmp_ole_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5437,14 +5287,13 @@ define <vscale x 4 x i1> @fcmp_one_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5455,14 +5304,13 @@ define <vscale x 4 x i1> @fcmp_one_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5475,14 +5323,13 @@ define <vscale x 4 x i1> @fcmp_one_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5494,9 +5341,9 @@ define <vscale x 4 x i1> @fcmp_ord_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5507,9 +5354,9 @@ define <vscale x 4 x i1> @fcmp_ord_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5522,9 +5369,9 @@ define <vscale x 4 x i1> @fcmp_ord_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5536,14 +5383,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5554,14 +5400,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5574,14 +5419,13 @@ define <vscale x 4 x i1> @fcmp_ueq_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5593,12 +5437,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5609,12 +5452,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5627,12 +5469,11 @@ define <vscale x 4 x i1> @fcmp_ugt_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5644,12 +5485,11 @@ define <vscale x 4 x i1> @fcmp_uge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5660,12 +5500,11 @@ define <vscale x 4 x i1> @fcmp_uge_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5678,12 +5517,11 @@ define <vscale x 4 x i1> @fcmp_uge_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5695,12 +5533,11 @@ define <vscale x 4 x i1> @fcmp_ult_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5711,12 +5548,11 @@ define <vscale x 4 x i1> @fcmp_ult_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5729,12 +5565,11 @@ define <vscale x 4 x i1> @fcmp_ult_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5746,12 +5581,11 @@ define <vscale x 4 x i1> @fcmp_ule_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5762,12 +5596,11 @@ define <vscale x 4 x i1> @fcmp_ule_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5780,12 +5613,11 @@ define <vscale x 4 x i1> @fcmp_ule_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5831,9 +5663,9 @@ define <vscale x 4 x i1> @fcmp_uno_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5844,9 +5676,9 @@ define <vscale x 4 x i1> @fcmp_uno_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5859,9 +5691,9 @@ define <vscale x 4 x i1> @fcmp_uno_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5908,12 +5740,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5924,12 +5754,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5942,12 +5770,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5959,12 +5785,10 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5975,12 +5799,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5993,12 +5815,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6010,12 +5830,10 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6026,12 +5844,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6044,12 +5860,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6061,12 +5875,10 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6077,12 +5889,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6095,12 +5905,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6112,14 +5920,13 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6130,14 +5937,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6150,14 +5956,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6169,9 +5974,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6182,9 +5987,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6197,9 +6002,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6211,14 +6016,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6229,14 +6033,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6249,14 +6052,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6268,12 +6070,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6284,12 +6085,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6302,12 +6102,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6319,12 +6118,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6335,12 +6133,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6353,12 +6150,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6370,12 +6166,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6386,12 +6181,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6404,12 +6198,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6421,12 +6214,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6437,12 +6229,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6455,12 +6246,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6506,9 +6296,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6519,9 +6309,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6534,9 +6324,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6583,12 +6373,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ogt_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6599,12 +6387,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6617,12 +6403,10 @@ define <vscale x 16 x i1> @fcmp_ogt_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6634,12 +6418,10 @@ define <vscale x 16 x i1> @fcmp_oge_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_oge_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6650,12 +6432,10 @@ define <vscale x 16 x i1> @fcmp_oge_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6668,12 +6448,10 @@ define <vscale x 16 x i1> @fcmp_oge_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6685,12 +6463,10 @@ define <vscale x 16 x i1> @fcmp_olt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_olt_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6701,12 +6477,10 @@ define <vscale x 16 x i1> @fcmp_olt_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6719,12 +6493,10 @@ define <vscale x 16 x i1> @fcmp_olt_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6736,12 +6508,10 @@ define <vscale x 16 x i1> @fcmp_ole_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ole_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6752,12 +6522,10 @@ define <vscale x 16 x i1> @fcmp_ole_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6770,12 +6538,10 @@ define <vscale x 16 x i1> @fcmp_ole_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6787,14 +6553,13 @@ define <vscale x 16 x i1> @fcmp_one_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_one_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6805,14 +6570,13 @@ define <vscale x 16 x i1> @fcmp_one_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6825,14 +6589,13 @@ define <vscale x 16 x i1> @fcmp_one_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6844,9 +6607,9 @@ define <vscale x 16 x i1> @fcmp_ord_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ord_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6857,9 +6620,9 @@ define <vscale x 16 x i1> @fcmp_ord_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6872,9 +6635,9 @@ define <vscale x 16 x i1> @fcmp_ord_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6886,14 +6649,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ueq_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6904,14 +6666,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6924,14 +6685,13 @@ define <vscale x 16 x i1> @fcmp_ueq_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6943,12 +6703,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ugt_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6959,12 +6718,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6977,12 +6735,11 @@ define <vscale x 16 x i1> @fcmp_ugt_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6994,12 +6751,11 @@ define <vscale x 16 x i1> @fcmp_uge_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_uge_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -7010,12 +6766,11 @@ define <vscale x 16 x i1> @fcmp_uge_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7028,12 +6783,11 @@ define <vscale x 16 x i1> @fcmp_uge_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7045,12 +6799,11 @@ define <vscale x 16 x i1> @fcmp_ult_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ult_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -7061,12 +6814,11 @@ define <vscale x 16 x i1> @fcmp_ult_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7079,12 +6831,11 @@ define <vscale x 16 x i1> @fcmp_ult_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7096,12 +6847,11 @@ define <vscale x 16 x i1> @fcmp_ule_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ule_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -7112,12 +6862,11 @@ define <vscale x 16 x i1> @fcmp_ule_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7130,12 +6879,11 @@ define <vscale x 16 x i1> @fcmp_ule_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7181,9 +6929,9 @@ define <vscale x 16 x i1> @fcmp_uno_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_uno_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v0, v16, v16 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -7194,9 +6942,9 @@ define <vscale x 16 x i1> @fcmp_uno_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7209,9 +6957,9 @@ define <vscale x 16 x i1> @fcmp_uno_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7891,12 +7639,10 @@ define <vscale x 2 x i1> @fcmp_ogt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -7907,12 +7653,10 @@ define <vscale x 2 x i1> @fcmp_ogt_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -7925,12 +7669,10 @@ define <vscale x 2 x i1> @fcmp_ogt_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -7942,12 +7684,10 @@ define <vscale x 2 x i1> @fcmp_oge_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -7958,12 +7698,10 @@ define <vscale x 2 x i1> @fcmp_oge_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -7976,12 +7714,10 @@ define <vscale x 2 x i1> @fcmp_oge_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -7993,12 +7729,10 @@ define <vscale x 2 x i1> @fcmp_olt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8009,12 +7743,10 @@ define <vscale x 2 x i1> @fcmp_olt_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8027,12 +7759,10 @@ define <vscale x 2 x i1> @fcmp_olt_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8044,12 +7774,10 @@ define <vscale x 2 x i1> @fcmp_ole_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8060,12 +7788,10 @@ define <vscale x 2 x i1> @fcmp_ole_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8078,12 +7804,10 @@ define <vscale x 2 x i1> @fcmp_ole_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8095,14 +7819,13 @@ define <vscale x 2 x i1> @fcmp_one_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8113,14 +7836,13 @@ define <vscale x 2 x i1> @fcmp_one_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8133,14 +7855,13 @@ define <vscale x 2 x i1> @fcmp_one_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8152,9 +7873,9 @@ define <vscale x 2 x i1> @fcmp_ord_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8165,9 +7886,9 @@ define <vscale x 2 x i1> @fcmp_ord_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8180,9 +7901,9 @@ define <vscale x 2 x i1> @fcmp_ord_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8194,14 +7915,13 @@ define <vscale x 2 x i1> @fcmp_ueq_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8212,14 +7932,13 @@ define <vscale x 2 x i1> @fcmp_ueq_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8232,14 +7951,13 @@ define <vscale x 2 x i1> @fcmp_ueq_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8251,12 +7969,11 @@ define <vscale x 2 x i1> @fcmp_ugt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8267,12 +7984,11 @@ define <vscale x 2 x i1> @fcmp_ugt_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8285,12 +8001,11 @@ define <vscale x 2 x i1> @fcmp_ugt_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8302,12 +8017,11 @@ define <vscale x 2 x i1> @fcmp_uge_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8318,12 +8032,11 @@ define <vscale x 2 x i1> @fcmp_uge_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8336,12 +8049,11 @@ define <vscale x 2 x i1> @fcmp_uge_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8353,12 +8065,11 @@ define <vscale x 2 x i1> @fcmp_ult_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8369,12 +8080,11 @@ define <vscale x 2 x i1> @fcmp_ult_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8387,12 +8097,11 @@ define <vscale x 2 x i1> @fcmp_ult_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8404,12 +8113,11 @@ define <vscale x 2 x i1> @fcmp_ule_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8420,12 +8128,11 @@ define <vscale x 2 x i1> @fcmp_ule_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8438,12 +8145,11 @@ define <vscale x 2 x i1> @fcmp_ule_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8489,9 +8195,9 @@ define <vscale x 2 x i1> @fcmp_uno_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8502,9 +8208,9 @@ define <vscale x 2 x i1> @fcmp_uno_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8517,9 +8223,9 @@ define <vscale x 2 x i1> @fcmp_uno_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8566,12 +8272,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8582,12 +8286,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8600,12 +8302,10 @@ define <vscale x 4 x i1> @fcmp_ogt_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8617,12 +8317,10 @@ define <vscale x 4 x i1> @fcmp_oge_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8633,12 +8331,10 @@ define <vscale x 4 x i1> @fcmp_oge_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8651,12 +8347,10 @@ define <vscale x 4 x i1> @fcmp_oge_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8668,12 +8362,10 @@ define <vscale x 4 x i1> @fcmp_olt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8684,12 +8376,10 @@ define <vscale x 4 x i1> @fcmp_olt_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8702,12 +8392,10 @@ define <vscale x 4 x i1> @fcmp_olt_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8719,12 +8407,10 @@ define <vscale x 4 x i1> @fcmp_ole_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8735,12 +8421,10 @@ define <vscale x 4 x i1> @fcmp_ole_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8753,12 +8437,10 @@ define <vscale x 4 x i1> @fcmp_ole_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8770,14 +8452,13 @@ define <vscale x 4 x i1> @fcmp_one_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8788,14 +8469,13 @@ define <vscale x 4 x i1> @fcmp_one_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8808,14 +8488,13 @@ define <vscale x 4 x i1> @fcmp_one_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8827,9 +8506,9 @@ define <vscale x 4 x i1> @fcmp_ord_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8840,9 +8519,9 @@ define <vscale x 4 x i1> @fcmp_ord_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8855,9 +8534,9 @@ define <vscale x 4 x i1> @fcmp_ord_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8869,14 +8548,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8887,14 +8565,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8907,14 +8584,13 @@ define <vscale x 4 x i1> @fcmp_ueq_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8926,12 +8602,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8942,12 +8617,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8960,12 +8634,11 @@ define <vscale x 4 x i1> @fcmp_ugt_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8977,12 +8650,11 @@ define <vscale x 4 x i1> @fcmp_uge_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8993,12 +8665,11 @@ define <vscale x 4 x i1> @fcmp_uge_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9011,12 +8682,11 @@ define <vscale x 4 x i1> @fcmp_uge_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9028,12 +8698,11 @@ define <vscale x 4 x i1> @fcmp_ult_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -9044,12 +8713,11 @@ define <vscale x 4 x i1> @fcmp_ult_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9062,12 +8730,11 @@ define <vscale x 4 x i1> @fcmp_ult_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9079,12 +8746,11 @@ define <vscale x 4 x i1> @fcmp_ule_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -9095,12 +8761,11 @@ define <vscale x 4 x i1> @fcmp_ule_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9113,12 +8778,11 @@ define <vscale x 4 x i1> @fcmp_ule_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9164,9 +8828,9 @@ define <vscale x 4 x i1> @fcmp_uno_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -9177,9 +8841,9 @@ define <vscale x 4 x i1> @fcmp_uno_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9192,9 +8856,9 @@ define <vscale x 4 x i1> @fcmp_uno_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9241,12 +8905,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9257,12 +8919,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9275,12 +8935,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9292,12 +8950,10 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9308,12 +8964,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9326,12 +8980,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9343,12 +8995,10 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9359,12 +9009,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9377,12 +9025,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9394,12 +9040,10 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9410,12 +9054,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9428,12 +9070,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9445,14 +9085,13 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9463,14 +9102,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9483,14 +9121,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9502,9 +9139,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9515,9 +9152,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9530,9 +9167,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9544,14 +9181,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9562,14 +9198,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9582,14 +9217,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9601,12 +9235,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9617,12 +9250,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9635,12 +9267,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9652,12 +9283,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9668,12 +9298,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9686,12 +9315,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9703,12 +9331,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9719,12 +9346,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9737,12 +9363,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9754,12 +9379,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9770,12 +9394,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9788,12 +9411,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9839,9 +9461,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v0, v16, v16 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9852,9 +9474,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9867,9 +9489,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll index 9a10359..32e925f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll @@ -1605,9 +1605,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_oeq_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1617,9 +1617,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_oeq_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1631,9 +1631,9 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_oeq_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1781,9 +1781,9 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_one_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1793,9 +1793,9 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_one_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1807,9 +1807,9 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_one_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1821,9 +1821,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_ord_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1834,9 +1834,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1849,9 +1849,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1863,9 +1863,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_ueq_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1875,9 +1875,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ueq_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1889,9 +1889,9 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ueq_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1903,8 +1903,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_ugt_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1914,8 +1914,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ugt_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1927,8 +1927,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ugt_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1940,8 +1940,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_uge_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1951,8 +1951,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_uge_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1964,8 +1964,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_uge_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1977,8 +1977,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_ult_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1988,8 +1988,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ult_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2001,8 +2001,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ult_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2014,8 +2014,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_ule_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2025,8 +2025,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ule_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2038,8 +2038,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ule_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2051,9 +2051,9 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_une_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2063,9 +2063,9 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_une_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2077,9 +2077,9 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_une_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2091,10 +2091,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_uno_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2105,10 +2105,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2121,10 +2121,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2137,9 +2137,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_oeq_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2149,9 +2149,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_oeq_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2163,9 +2163,9 @@ define <vscale x 16 x i1> @fcmps_oeq_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_oeq_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2313,9 +2313,9 @@ define <vscale x 16 x i1> @fcmps_one_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_one_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2325,9 +2325,9 @@ define <vscale x 16 x i1> @fcmps_one_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_one_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2339,9 +2339,9 @@ define <vscale x 16 x i1> @fcmps_one_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_one_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2353,9 +2353,9 @@ define <vscale x 16 x i1> @fcmps_ord_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_ord_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2366,9 +2366,9 @@ define <vscale x 16 x i1> @fcmps_ord_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2381,9 +2381,9 @@ define <vscale x 16 x i1> @fcmps_ord_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2395,9 +2395,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_ueq_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2407,9 +2407,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ueq_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2421,9 +2421,9 @@ define <vscale x 16 x i1> @fcmps_ueq_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ueq_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2435,8 +2435,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_ugt_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2446,8 +2446,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ugt_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2459,8 +2459,8 @@ define <vscale x 16 x i1> @fcmps_ugt_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ugt_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2472,8 +2472,8 @@ define <vscale x 16 x i1> @fcmps_uge_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_uge_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2483,8 +2483,8 @@ define <vscale x 16 x i1> @fcmps_uge_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_uge_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2496,8 +2496,8 @@ define <vscale x 16 x i1> @fcmps_uge_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_uge_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2509,8 +2509,8 @@ define <vscale x 16 x i1> @fcmps_ult_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_ult_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2520,8 +2520,8 @@ define <vscale x 16 x i1> @fcmps_ult_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ult_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2533,8 +2533,8 @@ define <vscale x 16 x i1> @fcmps_ult_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ult_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2546,8 +2546,8 @@ define <vscale x 16 x i1> @fcmps_ule_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_ule_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2557,8 +2557,8 @@ define <vscale x 16 x i1> @fcmps_ule_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ule_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2570,8 +2570,8 @@ define <vscale x 16 x i1> @fcmps_ule_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ule_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2583,9 +2583,9 @@ define <vscale x 16 x i1> @fcmps_une_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_une_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2595,9 +2595,9 @@ define <vscale x 16 x i1> @fcmps_une_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_une_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2609,9 +2609,9 @@ define <vscale x 16 x i1> @fcmps_une_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_une_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2623,10 +2623,10 @@ define <vscale x 16 x i1> @fcmps_uno_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_uno_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2637,10 +2637,10 @@ define <vscale x 16 x i1> @fcmps_uno_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2653,10 +2653,10 @@ define <vscale x 16 x i1> @fcmps_uno_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2669,9 +2669,9 @@ define <vscale x 32 x i1> @fcmps_oeq_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_oeq_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -2681,9 +2681,9 @@ define <vscale x 32 x i1> @fcmps_oeq_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_oeq_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2695,9 +2695,9 @@ define <vscale x 32 x i1> @fcmps_oeq_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_oeq_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2845,9 +2845,9 @@ define <vscale x 32 x i1> @fcmps_one_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_one_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -2857,9 +2857,9 @@ define <vscale x 32 x i1> @fcmps_one_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_one_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2871,9 +2871,9 @@ define <vscale x 32 x i1> @fcmps_one_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_one_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2885,9 +2885,9 @@ define <vscale x 32 x i1> @fcmps_ord_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_ord_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -2898,9 +2898,9 @@ define <vscale x 32 x i1> @fcmps_ord_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2913,9 +2913,9 @@ define <vscale x 32 x i1> @fcmps_ord_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2927,9 +2927,9 @@ define <vscale x 32 x i1> @fcmps_ueq_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_ueq_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -2939,9 +2939,9 @@ define <vscale x 32 x i1> @fcmps_ueq_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ueq_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2953,9 +2953,9 @@ define <vscale x 32 x i1> @fcmps_ueq_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ueq_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2967,8 +2967,8 @@ define <vscale x 32 x i1> @fcmps_ugt_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_ugt_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -2978,8 +2978,8 @@ define <vscale x 32 x i1> @fcmps_ugt_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ugt_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2991,8 +2991,8 @@ define <vscale x 32 x i1> @fcmps_ugt_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ugt_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3004,8 +3004,8 @@ define <vscale x 32 x i1> @fcmps_uge_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_uge_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3015,8 +3015,8 @@ define <vscale x 32 x i1> @fcmps_uge_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_uge_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3028,8 +3028,8 @@ define <vscale x 32 x i1> @fcmps_uge_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_uge_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3041,8 +3041,8 @@ define <vscale x 32 x i1> @fcmps_ult_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_ult_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3052,8 +3052,8 @@ define <vscale x 32 x i1> @fcmps_ult_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ult_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3065,8 +3065,8 @@ define <vscale x 32 x i1> @fcmps_ult_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ult_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3078,8 +3078,8 @@ define <vscale x 32 x i1> @fcmps_ule_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_ule_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3089,8 +3089,8 @@ define <vscale x 32 x i1> @fcmps_ule_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ule_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3102,8 +3102,8 @@ define <vscale x 32 x i1> @fcmps_ule_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ule_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3115,9 +3115,9 @@ define <vscale x 32 x i1> @fcmps_une_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_une_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmnand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3127,9 +3127,9 @@ define <vscale x 32 x i1> @fcmps_une_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_une_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3141,9 +3141,9 @@ define <vscale x 32 x i1> @fcmps_une_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_une_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3155,10 +3155,10 @@ define <vscale x 32 x i1> @fcmps_uno_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_uno_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3169,10 +3169,10 @@ define <vscale x 32 x i1> @fcmps_uno_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3185,10 +3185,10 @@ define <vscale x 32 x i1> @fcmps_uno_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmnot.m v16, v24 -; CHECK-NEXT: vmfle.vv v17, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v16, v17 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmnot.m v16, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -4265,9 +4265,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_oeq_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4277,9 +4277,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_oeq_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4291,9 +4291,9 @@ define <vscale x 4 x i1> @fcmps_oeq_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_oeq_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4441,9 +4441,9 @@ define <vscale x 4 x i1> @fcmps_one_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_one_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4453,9 +4453,9 @@ define <vscale x 4 x i1> @fcmps_one_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_one_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4467,9 +4467,9 @@ define <vscale x 4 x i1> @fcmps_one_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_one_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4481,9 +4481,9 @@ define <vscale x 4 x i1> @fcmps_ord_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_ord_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4494,9 +4494,9 @@ define <vscale x 4 x i1> @fcmps_ord_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4509,9 +4509,9 @@ define <vscale x 4 x i1> @fcmps_ord_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4523,9 +4523,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_ueq_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4535,9 +4535,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ueq_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4549,9 +4549,9 @@ define <vscale x 4 x i1> @fcmps_ueq_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ueq_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4563,8 +4563,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_ugt_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4574,8 +4574,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ugt_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4587,8 +4587,8 @@ define <vscale x 4 x i1> @fcmps_ugt_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ugt_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4600,8 +4600,8 @@ define <vscale x 4 x i1> @fcmps_uge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_uge_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4611,8 +4611,8 @@ define <vscale x 4 x i1> @fcmps_uge_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_uge_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4624,8 +4624,8 @@ define <vscale x 4 x i1> @fcmps_uge_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_uge_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4637,8 +4637,8 @@ define <vscale x 4 x i1> @fcmps_ult_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_ult_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4648,8 +4648,8 @@ define <vscale x 4 x i1> @fcmps_ult_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ult_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4661,8 +4661,8 @@ define <vscale x 4 x i1> @fcmps_ult_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ult_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4674,8 +4674,8 @@ define <vscale x 4 x i1> @fcmps_ule_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_ule_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4685,8 +4685,8 @@ define <vscale x 4 x i1> @fcmps_ule_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ule_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4698,8 +4698,8 @@ define <vscale x 4 x i1> @fcmps_ule_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ule_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4711,9 +4711,9 @@ define <vscale x 4 x i1> @fcmps_une_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_une_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4723,9 +4723,9 @@ define <vscale x 4 x i1> @fcmps_une_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_une_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4737,9 +4737,9 @@ define <vscale x 4 x i1> @fcmps_une_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_une_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4751,10 +4751,10 @@ define <vscale x 4 x i1> @fcmps_uno_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_uno_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4765,10 +4765,10 @@ define <vscale x 4 x i1> @fcmps_uno_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4781,10 +4781,10 @@ define <vscale x 4 x i1> @fcmps_uno_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4797,9 +4797,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_oeq_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -4809,9 +4809,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_oeq_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -4823,9 +4823,9 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_oeq_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -4973,9 +4973,9 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_one_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -4985,9 +4985,9 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_one_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -4999,9 +4999,9 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_one_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5013,9 +5013,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_ord_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5026,9 +5026,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5041,9 +5041,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5055,9 +5055,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_ueq_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5067,9 +5067,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ueq_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5081,9 +5081,9 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ueq_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5095,8 +5095,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_ugt_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5106,8 +5106,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ugt_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5119,8 +5119,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ugt_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5132,8 +5132,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_uge_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5143,8 +5143,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_uge_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5156,8 +5156,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_uge_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5169,8 +5169,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_ult_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5180,8 +5180,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ult_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5193,8 +5193,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ult_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5206,8 +5206,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_ule_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5217,8 +5217,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ule_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5230,8 +5230,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ule_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5243,9 +5243,9 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_une_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5255,9 +5255,9 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_une_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5269,9 +5269,9 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_une_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5283,10 +5283,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_uno_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5297,10 +5297,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5313,10 +5313,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5329,9 +5329,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_oeq_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5341,9 +5341,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_oeq_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5355,9 +5355,9 @@ define <vscale x 16 x i1> @fcmps_oeq_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_oeq_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5505,9 +5505,9 @@ define <vscale x 16 x i1> @fcmps_one_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_one_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5517,9 +5517,9 @@ define <vscale x 16 x i1> @fcmps_one_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_one_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5531,9 +5531,9 @@ define <vscale x 16 x i1> @fcmps_one_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_one_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5545,9 +5545,9 @@ define <vscale x 16 x i1> @fcmps_ord_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_ord_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5558,9 +5558,9 @@ define <vscale x 16 x i1> @fcmps_ord_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5573,9 +5573,9 @@ define <vscale x 16 x i1> @fcmps_ord_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5587,9 +5587,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_ueq_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5599,9 +5599,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ueq_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5613,9 +5613,9 @@ define <vscale x 16 x i1> @fcmps_ueq_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ueq_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5627,8 +5627,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_ugt_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5638,8 +5638,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ugt_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5651,8 +5651,8 @@ define <vscale x 16 x i1> @fcmps_ugt_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ugt_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5664,8 +5664,8 @@ define <vscale x 16 x i1> @fcmps_uge_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_uge_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5675,8 +5675,8 @@ define <vscale x 16 x i1> @fcmps_uge_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_uge_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5688,8 +5688,8 @@ define <vscale x 16 x i1> @fcmps_uge_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_uge_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5701,8 +5701,8 @@ define <vscale x 16 x i1> @fcmps_ult_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_ult_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5712,8 +5712,8 @@ define <vscale x 16 x i1> @fcmps_ult_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ult_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5725,8 +5725,8 @@ define <vscale x 16 x i1> @fcmps_ult_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ult_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5738,8 +5738,8 @@ define <vscale x 16 x i1> @fcmps_ule_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_ule_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5749,8 +5749,8 @@ define <vscale x 16 x i1> @fcmps_ule_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ule_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5762,8 +5762,8 @@ define <vscale x 16 x i1> @fcmps_ule_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ule_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5775,9 +5775,9 @@ define <vscale x 16 x i1> @fcmps_une_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_une_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmnand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5787,9 +5787,9 @@ define <vscale x 16 x i1> @fcmps_une_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_une_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5801,9 +5801,9 @@ define <vscale x 16 x i1> @fcmps_une_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_une_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5815,10 +5815,10 @@ define <vscale x 16 x i1> @fcmps_uno_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_uno_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5829,10 +5829,10 @@ define <vscale x 16 x i1> @fcmps_uno_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5845,10 +5845,10 @@ define <vscale x 16 x i1> @fcmps_uno_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmnot.m v16, v24 -; CHECK-NEXT: vmfle.vv v17, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v16, v17 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmnot.m v16, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6393,9 +6393,9 @@ define <vscale x 2 x i1> @fcmps_oeq_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_oeq_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6405,9 +6405,9 @@ define <vscale x 2 x i1> @fcmps_oeq_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_oeq_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6419,9 +6419,9 @@ define <vscale x 2 x i1> @fcmps_oeq_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_oeq_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6569,9 +6569,9 @@ define <vscale x 2 x i1> @fcmps_one_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_one_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6581,9 +6581,9 @@ define <vscale x 2 x i1> @fcmps_one_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_one_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6595,9 +6595,9 @@ define <vscale x 2 x i1> @fcmps_one_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_one_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6609,9 +6609,9 @@ define <vscale x 2 x i1> @fcmps_ord_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_ord_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6622,9 +6622,9 @@ define <vscale x 2 x i1> @fcmps_ord_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6637,9 +6637,9 @@ define <vscale x 2 x i1> @fcmps_ord_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6651,9 +6651,9 @@ define <vscale x 2 x i1> @fcmps_ueq_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_ueq_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6663,9 +6663,9 @@ define <vscale x 2 x i1> @fcmps_ueq_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ueq_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6677,9 +6677,9 @@ define <vscale x 2 x i1> @fcmps_ueq_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ueq_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6691,8 +6691,8 @@ define <vscale x 2 x i1> @fcmps_ugt_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_ugt_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6702,8 +6702,8 @@ define <vscale x 2 x i1> @fcmps_ugt_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ugt_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6715,8 +6715,8 @@ define <vscale x 2 x i1> @fcmps_ugt_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ugt_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6728,8 +6728,8 @@ define <vscale x 2 x i1> @fcmps_uge_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_uge_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6739,8 +6739,8 @@ define <vscale x 2 x i1> @fcmps_uge_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_uge_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6752,8 +6752,8 @@ define <vscale x 2 x i1> @fcmps_uge_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_uge_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6765,8 +6765,8 @@ define <vscale x 2 x i1> @fcmps_ult_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_ult_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6776,8 +6776,8 @@ define <vscale x 2 x i1> @fcmps_ult_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ult_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6789,8 +6789,8 @@ define <vscale x 2 x i1> @fcmps_ult_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ult_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6802,8 +6802,8 @@ define <vscale x 2 x i1> @fcmps_ule_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_ule_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6813,8 +6813,8 @@ define <vscale x 2 x i1> @fcmps_ule_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ule_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6826,8 +6826,8 @@ define <vscale x 2 x i1> @fcmps_ule_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ule_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6839,9 +6839,9 @@ define <vscale x 2 x i1> @fcmps_une_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_une_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6851,9 +6851,9 @@ define <vscale x 2 x i1> @fcmps_une_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_une_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6865,9 +6865,9 @@ define <vscale x 2 x i1> @fcmps_une_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_une_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6879,10 +6879,10 @@ define <vscale x 2 x i1> @fcmps_uno_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_uno_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6893,10 +6893,10 @@ define <vscale x 2 x i1> @fcmps_uno_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6909,10 +6909,10 @@ define <vscale x 2 x i1> @fcmps_uno_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6925,9 +6925,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_oeq_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -6937,9 +6937,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_oeq_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -6951,9 +6951,9 @@ define <vscale x 4 x i1> @fcmps_oeq_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_oeq_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7101,9 +7101,9 @@ define <vscale x 4 x i1> @fcmps_one_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_one_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7113,9 +7113,9 @@ define <vscale x 4 x i1> @fcmps_one_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_one_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7127,9 +7127,9 @@ define <vscale x 4 x i1> @fcmps_one_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_one_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7141,9 +7141,9 @@ define <vscale x 4 x i1> @fcmps_ord_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_ord_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7154,9 +7154,9 @@ define <vscale x 4 x i1> @fcmps_ord_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7169,9 +7169,9 @@ define <vscale x 4 x i1> @fcmps_ord_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7183,9 +7183,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_ueq_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7195,9 +7195,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ueq_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7209,9 +7209,9 @@ define <vscale x 4 x i1> @fcmps_ueq_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ueq_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7223,8 +7223,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_ugt_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7234,8 +7234,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ugt_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7247,8 +7247,8 @@ define <vscale x 4 x i1> @fcmps_ugt_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ugt_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7260,8 +7260,8 @@ define <vscale x 4 x i1> @fcmps_uge_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_uge_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7271,8 +7271,8 @@ define <vscale x 4 x i1> @fcmps_uge_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_uge_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7284,8 +7284,8 @@ define <vscale x 4 x i1> @fcmps_uge_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_uge_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7297,8 +7297,8 @@ define <vscale x 4 x i1> @fcmps_ult_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_ult_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7308,8 +7308,8 @@ define <vscale x 4 x i1> @fcmps_ult_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ult_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7321,8 +7321,8 @@ define <vscale x 4 x i1> @fcmps_ult_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ult_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7334,8 +7334,8 @@ define <vscale x 4 x i1> @fcmps_ule_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_ule_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7345,8 +7345,8 @@ define <vscale x 4 x i1> @fcmps_ule_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ule_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7358,8 +7358,8 @@ define <vscale x 4 x i1> @fcmps_ule_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ule_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7371,9 +7371,9 @@ define <vscale x 4 x i1> @fcmps_une_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_une_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7383,9 +7383,9 @@ define <vscale x 4 x i1> @fcmps_une_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_une_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7397,9 +7397,9 @@ define <vscale x 4 x i1> @fcmps_une_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_une_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7411,10 +7411,10 @@ define <vscale x 4 x i1> @fcmps_uno_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_uno_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7425,10 +7425,10 @@ define <vscale x 4 x i1> @fcmps_uno_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7441,10 +7441,10 @@ define <vscale x 4 x i1> @fcmps_uno_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7457,9 +7457,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_oeq_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7469,9 +7469,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_oeq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7483,9 +7483,9 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_oeq_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7633,9 +7633,9 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_one_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7645,9 +7645,9 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_one_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7659,9 +7659,9 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_one_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7673,9 +7673,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7686,9 +7686,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7701,9 +7701,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7715,9 +7715,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_ueq_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7727,9 +7727,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ueq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7741,9 +7741,9 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ueq_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7755,8 +7755,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_ugt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7766,8 +7766,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ugt_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7779,8 +7779,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ugt_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7792,8 +7792,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_uge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7803,8 +7803,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_uge_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7816,8 +7816,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_uge_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7829,8 +7829,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_ult_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7840,8 +7840,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ult_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7853,8 +7853,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ult_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7866,8 +7866,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_ule_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7877,8 +7877,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ule_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7890,8 +7890,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ule_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7903,9 +7903,9 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_une_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmnand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7915,9 +7915,9 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_une_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7929,9 +7929,9 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_une_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7943,10 +7943,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7957,10 +7957,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7973,10 +7973,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmnot.m v16, v24 -; CHECK-NEXT: vmfle.vv v17, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v16, v17 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmnot.m v16, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index 6d24bc2..eadb28f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -1108,95 +1108,69 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 50 +; CHECK-NEXT: li a3, 42 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x32, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 50 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 34 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a5, a2, a3 ; CHECK-NEXT: vl8re64.v v8, (a5) ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 26 -; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: slli a5, a5, 4 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: sub a5, a4, a1 ; CHECK-NEXT: sltu a6, a4, a5 ; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: srli a6, a1, 3 -; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v8, (a3) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: li a7, 42 -; CHECK-NEXT: mul a3, a3, a7 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a2) +; CHECK-NEXT: vl8re64.v v16, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 3 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a2, a0, 4 -; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v16, v0, a6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 26 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: and a0, a6, a5 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 42 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 42 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 @@ -1207,34 +1181,33 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: .LBB92_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: li a1, 42 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll index f4ab947..84c4417 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -1108,95 +1108,69 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 50 +; CHECK-NEXT: li a3, 42 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x32, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 50 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 34 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a5, a2, a3 ; CHECK-NEXT: vl8re64.v v8, (a5) ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 26 -; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: slli a5, a5, 4 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: sub a5, a4, a1 ; CHECK-NEXT: sltu a6, a4, a5 ; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: srli a6, a1, 3 -; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v8, (a3) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: li a7, 42 -; CHECK-NEXT: mul a3, a3, a7 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a2) +; CHECK-NEXT: vl8re64.v v16, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 3 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a2, a0, 4 -; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v16, v0, a6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 26 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: and a0, a6, a5 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 42 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 42 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 @@ -1207,34 +1181,33 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: .LBB92_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: li a1, 42 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll index b888fde..63fa6e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll @@ -932,13 +932,13 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v12, v16 ; ZVFHMIN-NEXT: vand.vi v12, v12, 1 -; ZVFHMIN-NEXT: vmsne.vi v16, v12, 0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v24 +; ZVFHMIN-NEXT: vmsne.vi v4, v12, 0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v16 ; ZVFHMIN-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 ; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0 +; ZVFHMIN-NEXT: vslideup.vx v0, v4, a0 ; ZVFHMIN-NEXT: ret %evec = fptosi <vscale x 32 x half> %va to <vscale x 32 x i1> ret <vscale x 32 x i1> %evec @@ -962,13 +962,13 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v12, v16 ; ZVFHMIN-NEXT: vand.vi v12, v12, 1 -; ZVFHMIN-NEXT: vmsne.vi v16, v12, 0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v24 +; ZVFHMIN-NEXT: vmsne.vi v4, v12, 0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v16 ; ZVFHMIN-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 ; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0 +; ZVFHMIN-NEXT: vslideup.vx v0, v4, a0 ; ZVFHMIN-NEXT: ret %evec = fptoui <vscale x 32 x half> %va to <vscale x 32 x i1> ret <vscale x 32 x i1> %evec diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll index e5048ea..9fc87b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll @@ -76,9 +76,8 @@ define <vscale x 2 x i1> @vfptosi_nxv2i1_nxv2f64(<vscale x 2 x double> %va, <vsc ; CHECK-LABEL: vfptosi_nxv2i1_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll index 4b609d0..f9ffcdb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll @@ -76,9 +76,8 @@ define <vscale x 2 x i1> @vfptoui_nxv2i1_nxv2f64(<vscale x 2 x double> %va, <vsc ; CHECK-LABEL: vfptoui_nxv2i1_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.rtz.xu.f.v v10, v8, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll index b85197d..685653c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -139,29 +139,29 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double> ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a5, a1, 3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v0, a5 -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: vslidedown.vx v25, v0, a3 -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v16, (a3) -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v0, a4 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8re64.v v16, (a4) +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v25, v8 +; CHECK-NEXT: vslidedown.vx v0, v8, a3 ; CHECK-NEXT: slli a3, a1, 1 ; CHECK-NEXT: sub a4, a2, a3 -; CHECK-NEXT: sltu a6, a2, a4 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a4, a6, a4 -; CHECK-NEXT: sub a6, a4, a1 -; CHECK-NEXT: sltu a7, a4, a6 +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: sub a5, a4, a1 ; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a7, a7, -1 -; CHECK-NEXT: and a0, a7, a6 -; CHECK-NEXT: vmv1r.v v24, v25 -; CHECK-NEXT: vslidedown.vx v0, v25, a5 +; CHECK-NEXT: sltu a0, a4, a5 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a5 ; CHECK-NEXT: bltu a4, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 @@ -169,7 +169,7 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double> ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t ; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB8_4 ; CHECK-NEXT: # %bb.3: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll index e718492..dd1547f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll index a6dad9e..c78e8d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfge.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmfge_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfge.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfge.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfge.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfge.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfge.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll index f643a40..b5299faf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll index 6c52364..383b175 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfle.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmfle_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfle.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfle.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfle.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfle.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfle.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll index 37a9c6b..7d0abe3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmflt_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll index 5defce4..db077b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfne.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmfne_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfne.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfne.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfne.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfne.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfne.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll index e2f5381..f96ab33 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmseq_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmseq_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmseq_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmseq.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmseq.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmseq.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmseq.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmseq.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmseq.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmseq.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmseq.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmseq_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmseq_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmseq_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll index da1c751..133ae98 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll @@ -4,19 +4,19 @@ ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8( <vscale x 1 x i8>, <vscale x 1 x i8>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vv_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i8_nxv1i8: +define <vscale x 1 x i1> @intrinsic_vmsge_vv_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8( <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) @@ -24,29 +24,28 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8( <vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8( + %mask = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8( <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %4) - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8( <vscale x 1 x i1> %0, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, @@ -56,19 +55,19 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8( <vscale x 2 x i8>, <vscale x 2 x i8>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vv_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i8_nxv2i8: +define <vscale x 2 x i1> @intrinsic_vmsge_vv_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8( <vscale x 2 x i8> %0, <vscale x 2 x i8> %1, iXLen %2) @@ -76,29 +75,28 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8( <vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8( + %mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8( <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, iXLen %4) - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8( <vscale x 2 x i1> %0, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, @@ -108,19 +106,19 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8( <vscale x 4 x i8>, <vscale x 4 x i8>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vv_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i8_nxv4i8: +define <vscale x 4 x i1> @intrinsic_vmsge_vv_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8( <vscale x 4 x i8> %0, <vscale x 4 x i8> %1, iXLen %2) @@ -128,29 +126,28 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8( <vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8( + %mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8( <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, iXLen %4) - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8( <vscale x 4 x i1> %0, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, @@ -160,19 +157,19 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8( <vscale x 8 x i8>, <vscale x 8 x i8>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vv_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i8_nxv8i8: +define <vscale x 8 x i1> @intrinsic_vmsge_vv_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8( <vscale x 8 x i8> %0, <vscale x 8 x i8> %1, iXLen %2) @@ -180,29 +177,28 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8( <vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8( + %mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8( <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, iXLen %4) - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8( <vscale x 8 x i1> %0, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, @@ -212,19 +208,19 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8( <vscale x 16 x i8>, <vscale x 16 x i8>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_vv_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv16i8_nxv16i8: +define <vscale x 16 x i1> @intrinsic_vmsge_vv_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8( <vscale x 16 x i8> %0, <vscale x 16 x i8> %1, iXLen %2) @@ -232,29 +228,28 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8( <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8( + %mask = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8( <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, iXLen %4) - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8( <vscale x 16 x i1> %0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, @@ -264,19 +259,19 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8( +declare <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8( <vscale x 32 x i8>, <vscale x 32 x i8>, iXLen); -define <vscale x 32 x i1> @intrinsic_vmseq_vv_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv32i8_nxv32i8: +define <vscale x 32 x i1> @intrinsic_vmsge_vv_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8( <vscale x 32 x i8> %0, <vscale x 32 x i8> %1, iXLen %2) @@ -284,29 +279,28 @@ entry: ret <vscale x 32 x i1> %a } -declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8( +declare <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8( <vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, iXLen); -define <vscale x 32 x i1> @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8: +define <vscale x 32 x i1> @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8( + %mask = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8( <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, iXLen %4) - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8( <vscale x 32 x i1> %0, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, @@ -316,19 +310,19 @@ entry: ret <vscale x 32 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16( <vscale x 1 x i16>, <vscale x 1 x i16>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vv_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i16_nxv1i16: +define <vscale x 1 x i1> @intrinsic_vmsge_vv_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16( <vscale x 1 x i16> %0, <vscale x 1 x i16> %1, iXLen %2) @@ -336,29 +330,28 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16( <vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16( + %mask = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16( <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, iXLen %4) - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16( <vscale x 1 x i1> %0, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, @@ -368,19 +361,19 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16( <vscale x 2 x i16>, <vscale x 2 x i16>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vv_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i16_nxv2i16: +define <vscale x 2 x i1> @intrinsic_vmsge_vv_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16( <vscale x 2 x i16> %0, <vscale x 2 x i16> %1, iXLen %2) @@ -388,29 +381,28 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16( <vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16( + %mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16( <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, iXLen %4) - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16( <vscale x 2 x i1> %0, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, @@ -420,19 +412,19 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16( <vscale x 4 x i16>, <vscale x 4 x i16>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vv_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i16_nxv4i16: +define <vscale x 4 x i1> @intrinsic_vmsge_vv_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16( <vscale x 4 x i16> %0, <vscale x 4 x i16> %1, iXLen %2) @@ -440,29 +432,28 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16( <vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16( + %mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16( <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, iXLen %4) - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16( <vscale x 4 x i1> %0, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, @@ -472,19 +463,19 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16( <vscale x 8 x i16>, <vscale x 8 x i16>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vv_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i16_nxv8i16: +define <vscale x 8 x i1> @intrinsic_vmsge_vv_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16( <vscale x 8 x i16> %0, <vscale x 8 x i16> %1, iXLen %2) @@ -492,29 +483,28 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16( <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16( + %mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16( <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, iXLen %4) - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16( <vscale x 8 x i1> %0, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, @@ -524,19 +514,19 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16( <vscale x 16 x i16>, <vscale x 16 x i16>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_vv_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv16i16_nxv16i16: +define <vscale x 16 x i1> @intrinsic_vmsge_vv_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16( <vscale x 16 x i16> %0, <vscale x 16 x i16> %1, iXLen %2) @@ -544,29 +534,28 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16( <vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16( + %mask = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16( <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, iXLen %4) - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16( <vscale x 16 x i1> %0, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, @@ -576,19 +565,19 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32( <vscale x 1 x i32>, <vscale x 1 x i32>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vv_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i32_nxv1i32: +define <vscale x 1 x i1> @intrinsic_vmsge_vv_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32( <vscale x 1 x i32> %0, <vscale x 1 x i32> %1, iXLen %2) @@ -596,29 +585,28 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32( <vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32( + %mask = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32( <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, iXLen %4) - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32( <vscale x 1 x i1> %0, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, @@ -628,19 +616,19 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32( <vscale x 2 x i32>, <vscale x 2 x i32>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vv_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i32_nxv2i32: +define <vscale x 2 x i1> @intrinsic_vmsge_vv_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32( <vscale x 2 x i32> %0, <vscale x 2 x i32> %1, iXLen %2) @@ -648,29 +636,28 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32( <vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32( + %mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32( <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, iXLen %4) - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32( <vscale x 2 x i1> %0, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, @@ -680,19 +667,19 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32( <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vv_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i32_nxv4i32: +define <vscale x 4 x i1> @intrinsic_vmsge_vv_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32( <vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2) @@ -700,29 +687,28 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32( <vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32( + %mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32( <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, iXLen %4) - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32( <vscale x 4 x i1> %0, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, @@ -732,19 +718,19 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32( <vscale x 8 x i32>, <vscale x 8 x i32>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vv_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i32_nxv8i32: +define <vscale x 8 x i1> @intrinsic_vmsge_vv_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32( <vscale x 8 x i32> %0, <vscale x 8 x i32> %1, iXLen %2) @@ -752,29 +738,28 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32( <vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32( + %mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32( <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, iXLen %4) - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32( <vscale x 8 x i1> %0, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, @@ -784,19 +769,19 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64( <vscale x 1 x i64>, <vscale x 1 x i64>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vv_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i64_nxv1i64: +define <vscale x 1 x i1> @intrinsic_vmsge_vv_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64( <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2) @@ -804,29 +789,28 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64( <vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64( + %mask = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64( <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, iXLen %4) - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64( <vscale x 1 x i1> %0, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, @@ -836,19 +820,19 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64( <vscale x 2 x i64>, <vscale x 2 x i64>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vv_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i64_nxv2i64: +define <vscale x 2 x i1> @intrinsic_vmsge_vv_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64( <vscale x 2 x i64> %0, <vscale x 2 x i64> %1, iXLen %2) @@ -856,29 +840,28 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64( <vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64( + %mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64( <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, iXLen %4) - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64( <vscale x 2 x i1> %0, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, @@ -888,19 +871,19 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64( <vscale x 4 x i64>, <vscale x 4 x i64>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vv_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i64_nxv4i64: +define <vscale x 4 x i1> @intrinsic_vmsge_vv_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64( <vscale x 4 x i64> %0, <vscale x 4 x i64> %1, iXLen %2) @@ -908,29 +891,28 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64( <vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64( + %mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64( <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, iXLen %4) - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64( <vscale x 4 x i1> %0, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, @@ -940,19 +922,20 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8.i8( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8.i8( <vscale x 1 x i8>, i8, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i8_i8(<vscale x 1 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i8_i8: +define <vscale x 1 x i1> @intrinsic_vmsge_vx_nxv1i8_i8(<vscale x 1 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8.i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8.i8( <vscale x 1 x i8> %0, i8 %1, iXLen %2) @@ -960,24 +943,24 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8.i8( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8.i8( <vscale x 1 x i1>, <vscale x 1 x i8>, i8, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i8_i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, i8 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i8_i8: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i8_i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, i8 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8.i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8.i8( <vscale x 1 x i1> %0, <vscale x 1 x i8> %1, i8 %2, @@ -987,19 +970,20 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8.i8( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8.i8( <vscale x 2 x i8>, i8, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i8_i8(<vscale x 2 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i8_i8: +define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i8_i8(<vscale x 2 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8.i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8.i8( <vscale x 2 x i8> %0, i8 %1, iXLen %2) @@ -1007,24 +991,24 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8.i8( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8.i8( <vscale x 2 x i1>, <vscale x 2 x i8>, i8, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i8_i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, i8 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i8_i8: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vx_nxv2i8_i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, i8 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8.i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8.i8( <vscale x 2 x i1> %0, <vscale x 2 x i8> %1, i8 %2, @@ -1034,19 +1018,20 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8.i8( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8.i8( <vscale x 4 x i8>, i8, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i8_i8(<vscale x 4 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i8_i8: +define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i8_i8(<vscale x 4 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8.i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8.i8( <vscale x 4 x i8> %0, i8 %1, iXLen %2) @@ -1054,24 +1039,24 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8.i8( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8.i8( <vscale x 4 x i1>, <vscale x 4 x i8>, i8, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i8_i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, i8 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i8_i8: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i8_i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, i8 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8.i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8.i8( <vscale x 4 x i1> %0, <vscale x 4 x i8> %1, i8 %2, @@ -1081,19 +1066,20 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8.i8( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8.i8( <vscale x 8 x i8>, i8, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vx_nxv8i8_i8(<vscale x 8 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i8_i8: +define <vscale x 8 x i1> @intrinsic_vmsge_vx_nxv8i8_i8(<vscale x 8 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8.i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8.i8( <vscale x 8 x i8> %0, i8 %1, iXLen %2) @@ -1101,24 +1087,24 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8.i8( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8.i8( <vscale x 8 x i1>, <vscale x 8 x i8>, i8, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i8_i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, i8 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i8_i8: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vx_nxv8i8_i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, i8 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8.i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8.i8( <vscale x 8 x i1> %0, <vscale x 8 x i8> %1, i8 %2, @@ -1128,19 +1114,20 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8.i8( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8.i8( <vscale x 16 x i8>, i8, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_vx_nxv16i8_i8(<vscale x 16 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv16i8_i8: +define <vscale x 16 x i1> @intrinsic_vmsge_vx_nxv16i8_i8(<vscale x 16 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8.i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8.i8( <vscale x 16 x i8> %0, i8 %1, iXLen %2) @@ -1148,24 +1135,24 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8( <vscale x 16 x i1>, <vscale x 16 x i8>, i8, <vscale x 16 x i1>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i8_i8: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8( <vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, @@ -1175,19 +1162,20 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.i8( +declare <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8.i8( <vscale x 32 x i8>, i8, iXLen); -define <vscale x 32 x i1> @intrinsic_vmseq_vx_nxv32i8_i8(<vscale x 32 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv32i8_i8: +define <vscale x 32 x i1> @intrinsic_vmsge_vx_nxv32i8_i8(<vscale x 32 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8.i8( <vscale x 32 x i8> %0, i8 %1, iXLen %2) @@ -1195,24 +1183,24 @@ entry: ret <vscale x 32 x i1> %a } -declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( +declare <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8( <vscale x 32 x i1>, <vscale x 32 x i8>, i8, <vscale x 32 x i1>, iXLen); -define <vscale x 32 x i1> @intrinsic_vmseq_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv32i8_i8: +define <vscale x 32 x i1> @intrinsic_vmsge_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8( <vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, @@ -1222,19 +1210,20 @@ entry: ret <vscale x 32 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16.i16( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16.i16( <vscale x 1 x i16>, i16, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i16_i16(<vscale x 1 x i16> %0, i16 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i16_i16: +define <vscale x 1 x i1> @intrinsic_vmsge_vx_nxv1i16_i16(<vscale x 1 x i16> %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16.i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16.i16( <vscale x 1 x i16> %0, i16 %1, iXLen %2) @@ -1242,24 +1231,24 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16.i16( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16.i16( <vscale x 1 x i1>, <vscale x 1 x i16>, i16, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i16_i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, i16 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i16_i16: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i16_i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, i16 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16.i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16.i16( <vscale x 1 x i1> %0, <vscale x 1 x i16> %1, i16 %2, @@ -1269,19 +1258,20 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16.i16( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16.i16( <vscale x 2 x i16>, i16, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i16_i16(<vscale x 2 x i16> %0, i16 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i16_i16: +define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i16_i16(<vscale x 2 x i16> %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16.i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16.i16( <vscale x 2 x i16> %0, i16 %1, iXLen %2) @@ -1289,24 +1279,24 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16.i16( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16.i16( <vscale x 2 x i1>, <vscale x 2 x i16>, i16, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i16_i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, i16 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i16_i16: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vx_nxv2i16_i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, i16 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16.i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16.i16( <vscale x 2 x i1> %0, <vscale x 2 x i16> %1, i16 %2, @@ -1316,19 +1306,20 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16.i16( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16.i16( <vscale x 4 x i16>, i16, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i16_i16(<vscale x 4 x i16> %0, i16 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i16_i16: +define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i16_i16(<vscale x 4 x i16> %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16.i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16.i16( <vscale x 4 x i16> %0, i16 %1, iXLen %2) @@ -1336,24 +1327,24 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16.i16( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16.i16( <vscale x 4 x i1>, <vscale x 4 x i16>, i16, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i16_i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, i16 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i16_i16: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i16_i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, i16 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16.i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16.i16( <vscale x 4 x i1> %0, <vscale x 4 x i16> %1, i16 %2, @@ -1363,19 +1354,20 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16.i16( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16.i16( <vscale x 8 x i16>, i16, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vx_nxv8i16_i16(<vscale x 8 x i16> %0, i16 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i16_i16: +define <vscale x 8 x i1> @intrinsic_vmsge_vx_nxv8i16_i16(<vscale x 8 x i16> %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16.i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16.i16( <vscale x 8 x i16> %0, i16 %1, iXLen %2) @@ -1383,24 +1375,24 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16( <vscale x 8 x i1>, <vscale x 8 x i16>, i16, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i16_i16: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16( <vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, @@ -1410,19 +1402,20 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16.i16( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16.i16( <vscale x 16 x i16>, i16, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_vx_nxv16i16_i16(<vscale x 16 x i16> %0, i16 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv16i16_i16: +define <vscale x 16 x i1> @intrinsic_vmsge_vx_nxv16i16_i16(<vscale x 16 x i16> %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16.i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16.i16( <vscale x 16 x i16> %0, i16 %1, iXLen %2) @@ -1430,24 +1423,24 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16( <vscale x 16 x i1>, <vscale x 16 x i16>, i16, <vscale x 16 x i1>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i16_i16: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16( <vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, @@ -1457,19 +1450,20 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32.i32( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32.i32( <vscale x 1 x i32>, i32, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i32_i32(<vscale x 1 x i32> %0, i32 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i32_i32: +define <vscale x 1 x i1> @intrinsic_vmsge_vx_nxv1i32_i32(<vscale x 1 x i32> %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32.i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32.i32( <vscale x 1 x i32> %0, i32 %1, iXLen %2) @@ -1477,24 +1471,24 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32.i32( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32.i32( <vscale x 1 x i1>, <vscale x 1 x i32>, i32, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i32_i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, i32 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i32_i32: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i32_i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, i32 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32.i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32.i32( <vscale x 1 x i1> %0, <vscale x 1 x i32> %1, i32 %2, @@ -1504,19 +1498,20 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32.i32( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32.i32( <vscale x 2 x i32>, i32, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i32_i32(<vscale x 2 x i32> %0, i32 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i32_i32: +define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i32_i32(<vscale x 2 x i32> %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32.i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32.i32( <vscale x 2 x i32> %0, i32 %1, iXLen %2) @@ -1524,24 +1519,24 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32.i32( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32.i32( <vscale x 2 x i1>, <vscale x 2 x i32>, i32, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i32_i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, i32 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i32_i32: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vx_nxv2i32_i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, i32 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32.i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32.i32( <vscale x 2 x i1> %0, <vscale x 2 x i32> %1, i32 %2, @@ -1551,19 +1546,20 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.i32( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32.i32( <vscale x 4 x i32>, i32, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i32_i32(<vscale x 4 x i32> %0, i32 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i32_i32: +define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i32_i32(<vscale x 4 x i32> %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32.i32( <vscale x 4 x i32> %0, i32 %1, iXLen %2) @@ -1571,24 +1567,24 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32( <vscale x 4 x i1>, <vscale x 4 x i32>, i32, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i32_i32: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32( <vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, @@ -1598,19 +1594,20 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32.i32( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32.i32( <vscale x 8 x i32>, i32, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vx_nxv8i32_i32(<vscale x 8 x i32> %0, i32 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i32_i32: +define <vscale x 8 x i1> @intrinsic_vmsge_vx_nxv8i32_i32(<vscale x 8 x i32> %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32.i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32.i32( <vscale x 8 x i32> %0, i32 %1, iXLen %2) @@ -1618,24 +1615,24 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32( <vscale x 8 x i1>, <vscale x 8 x i32>, i32, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i32_i32: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32( <vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, @@ -1645,13 +1642,13 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64.i64( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64.i64( <vscale x 1 x i64>, i64, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, iXLen %2) nounwind { -; RV32-LABEL: intrinsic_vmseq_vx_nxv1i64_i64: +define <vscale x 1 x i1> @intrinsic_vmsge_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vmsge_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) @@ -1659,17 +1656,18 @@ define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i64_i64(<vscale x 1 x i64> %0, ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vmseq.vv v0, v8, v9 +; RV32-NEXT: vmsle.vv v0, v9, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_vx_nxv1i64_i64: +; RV64-LABEL: intrinsic_vmsge_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; RV64-NEXT: vmseq.vx v0, v8, a0 +; RV64-NEXT: vmslt.vx v8, v8, a0 +; RV64-NEXT: vmnot.m v0, v8 ; RV64-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64.i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64.i64( <vscale x 1 x i64> %0, i64 %1, iXLen %2) @@ -1677,39 +1675,39 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64.i64( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64.i64( <vscale x 1 x i1>, <vscale x 1 x i64>, i64, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { -; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmseq.vv v10, v8, v11, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsle.vv v11, v10, v8, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: +; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 -; RV64-NEXT: vmseq.vx v10, v8, a0, v0.t -; RV64-NEXT: vmv.v.v v0, v10 +; RV64-NEXT: vmslt.vx v10, v8, a0, v0.t +; RV64-NEXT: vmxor.mm v0, v10, v9 ; RV64-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64.i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64.i64( <vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, @@ -1719,13 +1717,13 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64.i64( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64.i64( <vscale x 2 x i64>, i64, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, iXLen %2) nounwind { -; RV32-LABEL: intrinsic_vmseq_vx_nxv2i64_i64: +define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vmsge_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) @@ -1733,17 +1731,18 @@ define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i64_i64(<vscale x 2 x i64> %0, ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vmseq.vv v0, v8, v10 +; RV32-NEXT: vmsle.vv v0, v10, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_vx_nxv2i64_i64: +; RV64-LABEL: intrinsic_vmsge_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; RV64-NEXT: vmseq.vx v0, v8, a0 +; RV64-NEXT: vmslt.vx v0, v8, a0 +; RV64-NEXT: vmnot.m v0, v0 ; RV64-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64.i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64.i64( <vscale x 2 x i64> %0, i64 %1, iXLen %2) @@ -1751,15 +1750,15 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64( <vscale x 2 x i1>, <vscale x 2 x i64>, i64, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { -; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) @@ -1767,23 +1766,23 @@ define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmseq.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsle.vv v2, v12, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: +; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmseq.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmslt.vx v2, v8, a0, v0.t +; RV64-NEXT: vmxor.mm v0, v2, v10 ; RV64-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64( <vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, @@ -1793,13 +1792,13 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64.i64( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64.i64( <vscale x 4 x i64>, i64, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, iXLen %2) nounwind { -; RV32-LABEL: intrinsic_vmseq_vx_nxv4i64_i64: +define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vmsge_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) @@ -1807,17 +1806,18 @@ define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i64_i64(<vscale x 4 x i64> %0, ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmseq.vv v0, v8, v12 +; RV32-NEXT: vmsle.vv v0, v12, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_vx_nxv4i64_i64: +; RV64-LABEL: intrinsic_vmsge_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmseq.vx v0, v8, a0 +; RV64-NEXT: vmslt.vx v0, v8, a0 +; RV64-NEXT: vmnot.m v0, v0 ; RV64-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64.i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64.i64( <vscale x 4 x i64> %0, i64 %1, iXLen %2) @@ -1825,15 +1825,15 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64( <vscale x 4 x i1>, <vscale x 4 x i64>, i64, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { -; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) @@ -1841,23 +1841,23 @@ define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmseq.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsle.vv v4, v16, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: +; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmseq.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmslt.vx v4, v8, a0, v0.t +; RV64-NEXT: vmxor.mm v0, v4, v12 ; RV64-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64( <vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, @@ -1867,452 +1867,452 @@ entry: ret <vscale x 4 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_vi_nxv1i8_i8(<vscale x 1 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i8_i8: +define <vscale x 1 x i1> @intrinsic_vmsge_vi_nxv1i8_i8(<vscale x 1 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -16 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8.i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8.i8( <vscale x 1 x i8> %0, - i8 9, + i8 -15, iXLen %1) ret <vscale x 1 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vi_nxv1i8_i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i8_i8: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vi_nxv1i8_i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8.i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8.i8( <vscale x 1 x i1> %0, <vscale x 1 x i8> %1, - i8 9, + i8 -14, <vscale x 1 x i1> %2, iXLen %3) ret <vscale x 1 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_vi_nxv2i8_i8(<vscale x 2 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i8_i8: +define <vscale x 2 x i1> @intrinsic_vmsge_vi_nxv2i8_i8(<vscale x 2 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -14 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8.i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8.i8( <vscale x 2 x i8> %0, - i8 9, + i8 -13, iXLen %1) ret <vscale x 2 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i8_i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i8_i8: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vi_nxv2i8_i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8.i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8.i8( <vscale x 2 x i1> %0, <vscale x 2 x i8> %1, - i8 9, + i8 -12, <vscale x 2 x i1> %2, iXLen %3) ret <vscale x 2 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_vi_nxv4i8_i8(<vscale x 4 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i8_i8: +define <vscale x 4 x i1> @intrinsic_vmsge_vi_nxv4i8_i8(<vscale x 4 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -12 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8.i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8.i8( <vscale x 4 x i8> %0, - i8 9, + i8 -11, iXLen %1) ret <vscale x 4 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i8_i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i8_i8: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vi_nxv4i8_i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8.i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8.i8( <vscale x 4 x i1> %0, <vscale x 4 x i8> %1, - i8 9, + i8 -10, <vscale x 4 x i1> %2, iXLen %3) ret <vscale x 4 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_vi_nxv8i8_i8(<vscale x 8 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i8_i8: +define <vscale x 8 x i1> @intrinsic_vmsge_vi_nxv8i8_i8(<vscale x 8 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8.i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8.i8( <vscale x 8 x i8> %0, - i8 9, + i8 -9, iXLen %1) ret <vscale x 8 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i8_i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i8_i8: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vi_nxv8i8_i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8.i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8.i8( <vscale x 8 x i1> %0, <vscale x 8 x i8> %1, - i8 9, + i8 -8, <vscale x 8 x i1> %2, iXLen %3) ret <vscale x 8 x i1> %a } -define <vscale x 16 x i1> @intrinsic_vmseq_vi_nxv16i8_i8(<vscale x 16 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv16i8_i8: +define <vscale x 16 x i1> @intrinsic_vmsge_vi_nxv16i8_i8(<vscale x 16 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8.i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8.i8( <vscale x 16 x i8> %0, - i8 9, + i8 -7, iXLen %1) ret <vscale x 16 x i1> %a } -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i8_i8: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8( <vscale x 16 x i1> %0, <vscale x 16 x i8> %1, - i8 9, + i8 -6, <vscale x 16 x i1> %2, iXLen %3) ret <vscale x 16 x i1> %a } -define <vscale x 32 x i1> @intrinsic_vmseq_vi_nxv32i8_i8(<vscale x 32 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv32i8_i8: +define <vscale x 32 x i1> @intrinsic_vmsge_vi_nxv32i8_i8(<vscale x 32 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -6 ; CHECK-NEXT: ret entry: - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8.i8( <vscale x 32 x i8> %0, - i8 9, + i8 -5, iXLen %1) ret <vscale x 32 x i1> %a } -define <vscale x 32 x i1> @intrinsic_vmseq_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv32i8_i8: +define <vscale x 32 x i1> @intrinsic_vmsge_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8( <vscale x 32 x i1> %0, <vscale x 32 x i8> %1, - i8 9, + i8 -4, <vscale x 32 x i1> %2, iXLen %3) ret <vscale x 32 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_vi_nxv1i16_i16(<vscale x 1 x i16> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i16_i16: +define <vscale x 1 x i1> @intrinsic_vmsge_vi_nxv1i16_i16(<vscale x 1 x i16> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -4 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16.i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16.i16( <vscale x 1 x i16> %0, - i16 9, + i16 -3, iXLen %1) ret <vscale x 1 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vi_nxv1i16_i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i16_i16: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vi_nxv1i16_i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16.i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16.i16( <vscale x 1 x i1> %0, <vscale x 1 x i16> %1, - i16 9, + i16 -2, <vscale x 1 x i1> %2, iXLen %3) ret <vscale x 1 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_vi_nxv2i16_i16(<vscale x 2 x i16> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i16_i16: +define <vscale x 2 x i1> @intrinsic_vmsge_vi_nxv2i16_i16(<vscale x 2 x i16> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -2 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16.i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16.i16( <vscale x 2 x i16> %0, - i16 9, + i16 -1, iXLen %1) ret <vscale x 2 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i16_i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i16_i16: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vi_nxv2i16_i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -1, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16.i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16.i16( <vscale x 2 x i1> %0, <vscale x 2 x i16> %1, - i16 9, + i16 0, <vscale x 2 x i1> %2, iXLen %3) ret <vscale x 2 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_vi_nxv4i16_i16(<vscale x 4 x i16> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i16_i16: +define <vscale x 4 x i1> @intrinsic_vmsge_vi_nxv4i16_i16(<vscale x 4 x i16> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -1 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16.i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16.i16( <vscale x 4 x i16> %0, - i16 9, + i16 0, iXLen %1) ret <vscale x 4 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i16_i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i16_i16: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vi_nxv4i16_i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16.i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16.i16( <vscale x 4 x i1> %0, <vscale x 4 x i16> %1, - i16 9, + i16 1, <vscale x 4 x i1> %2, iXLen %3) ret <vscale x 4 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_vi_nxv8i16_i16(<vscale x 8 x i16> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i16_i16: +define <vscale x 8 x i1> @intrinsic_vmsge_vi_nxv8i16_i16(<vscale x 8 x i16> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 1 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16.i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16.i16( <vscale x 8 x i16> %0, - i16 9, + i16 2, iXLen %1) ret <vscale x 8 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i16_i16: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16( <vscale x 8 x i1> %0, <vscale x 8 x i16> %1, - i16 9, + i16 3, <vscale x 8 x i1> %2, iXLen %3) ret <vscale x 8 x i1> %a } -define <vscale x 16 x i1> @intrinsic_vmseq_vi_nxv16i16_i16(<vscale x 16 x i16> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv16i16_i16: +define <vscale x 16 x i1> @intrinsic_vmsge_vi_nxv16i16_i16(<vscale x 16 x i16> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 3 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16.i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16.i16( <vscale x 16 x i16> %0, - i16 9, + i16 4, iXLen %1) ret <vscale x 16 x i1> %a } -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i16_i16: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16( <vscale x 16 x i1> %0, <vscale x 16 x i16> %1, - i16 9, + i16 5, <vscale x 16 x i1> %2, iXLen %3) ret <vscale x 16 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_vi_nxv1i32_i32(<vscale x 1 x i32> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i32_i32: +define <vscale x 1 x i1> @intrinsic_vmsge_vi_nxv1i32_i32(<vscale x 1 x i32> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 5 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32.i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32.i32( <vscale x 1 x i32> %0, - i32 9, + i32 6, iXLen %1) ret <vscale x 1 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vi_nxv1i32_i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i32_i32: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vi_nxv1i32_i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32.i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32.i32( <vscale x 1 x i1> %0, <vscale x 1 x i32> %1, - i32 9, + i32 7, <vscale x 1 x i1> %2, iXLen %3) ret <vscale x 1 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_vi_nxv2i32_i32(<vscale x 2 x i32> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i32_i32: +define <vscale x 2 x i1> @intrinsic_vmsge_vi_nxv2i32_i32(<vscale x 2 x i32> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 7 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32.i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32.i32( <vscale x 2 x i32> %0, - i32 9, + i32 8, iXLen %1) ret <vscale x 2 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i32_i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i32_i32: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vi_nxv2i32_i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32.i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32.i32( <vscale x 2 x i1> %0, <vscale x 2 x i32> %1, i32 9, @@ -2322,84 +2322,84 @@ entry: ret <vscale x 2 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_vi_nxv4i32_i32(<vscale x 4 x i32> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i32_i32: +define <vscale x 4 x i1> @intrinsic_vmsge_vi_nxv4i32_i32(<vscale x 4 x i32> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32.i32( <vscale x 4 x i32> %0, - i32 9, + i32 10, iXLen %1) ret <vscale x 4 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i32_i32: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32( <vscale x 4 x i1> %0, <vscale x 4 x i32> %1, - i32 9, + i32 11, <vscale x 4 x i1> %2, iXLen %3) ret <vscale x 4 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_vi_nxv8i32_i32(<vscale x 8 x i32> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i32_i32: +define <vscale x 8 x i1> @intrinsic_vmsge_vi_nxv8i32_i32(<vscale x 8 x i32> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 11 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32.i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32.i32( <vscale x 8 x i32> %0, - i32 9, + i32 12, iXLen %1) ret <vscale x 8 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i32_i32: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32( <vscale x 8 x i1> %0, <vscale x 8 x i32> %1, - i32 9, + i32 13, <vscale x 8 x i1> %2, iXLen %3) ret <vscale x 8 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_vi_nxv1i64_i64(<vscale x 1 x i64> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i64_i64: +define <vscale x 1 x i1> @intrinsic_vmsge_vi_nxv1i64_i64(<vscale x 1 x i64> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64.i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64.i64( <vscale x 1 x i64> %0, i64 9, iXLen %1) @@ -2407,17 +2407,17 @@ entry: ret <vscale x 1 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vi_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i64_i64: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vi_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64.i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64.i64( <vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 9, @@ -2427,14 +2427,14 @@ entry: ret <vscale x 1 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_vi_nxv2i64_i64(<vscale x 2 x i64> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i64_i64: +define <vscale x 2 x i1> @intrinsic_vmsge_vi_nxv2i64_i64(<vscale x 2 x i64> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64.i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64.i64( <vscale x 2 x i64> %0, i64 9, iXLen %1) @@ -2442,17 +2442,17 @@ entry: ret <vscale x 2 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i64_i64: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64( <vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 9, @@ -2462,14 +2462,14 @@ entry: ret <vscale x 2 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_vi_nxv4i64_i64(<vscale x 4 x i64> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i64_i64: +define <vscale x 4 x i1> @intrinsic_vmsge_vi_nxv4i64_i64(<vscale x 4 x i64> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64.i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64.i64( <vscale x 4 x i64> %0, i64 9, iXLen %1) @@ -2477,17 +2477,17 @@ entry: ret <vscale x 4 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i64_i64: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64( <vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 9, @@ -2496,3 +2496,364 @@ entry: ret <vscale x 4 x i1> %a } + +; Test cases where the mask and maskedoff are the same value. +define <vscale x 1 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv1i8_i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8.i8( + <vscale x 1 x i1> %0, + <vscale x 1 x i8> %1, + i8 %2, + <vscale x 1 x i1> %0, + iXLen %3) + + ret <vscale x 1 x i1> %a +} + +define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i8_i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8.i8( + <vscale x 2 x i1> %0, + <vscale x 2 x i8> %1, + i8 %2, + <vscale x 2 x i1> %0, + iXLen %3) + + ret <vscale x 2 x i1> %a +} + +define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i8_i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8.i8( + <vscale x 4 x i1> %0, + <vscale x 4 x i8> %1, + i8 %2, + <vscale x 4 x i1> %0, + iXLen %3) + + ret <vscale x 4 x i1> %a +} + +define <vscale x 8 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv8i8_i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv8i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8.i8( + <vscale x 8 x i1> %0, + <vscale x 8 x i8> %1, + i8 %2, + <vscale x 8 x i1> %0, + iXLen %3) + + ret <vscale x 8 x i1> %a +} + +define <vscale x 16 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv16i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmslt.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8( + <vscale x 16 x i1> %0, + <vscale x 16 x i8> %1, + i8 %2, + <vscale x 16 x i1> %0, + iXLen %3) + + ret <vscale x 16 x i1> %a +} + +define <vscale x 32 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv32i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmslt.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8( + <vscale x 32 x i1> %0, + <vscale x 32 x i8> %1, + i8 %2, + <vscale x 32 x i1> %0, + iXLen %3) + + ret <vscale x 32 x i1> %a +} + +define <vscale x 1 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv1i16_i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, i16 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16.i16( + <vscale x 1 x i1> %0, + <vscale x 1 x i16> %1, + i16 %2, + <vscale x 1 x i1> %0, + iXLen %3) + + ret <vscale x 1 x i1> %a +} + +define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i16_i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, i16 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16.i16( + <vscale x 2 x i1> %0, + <vscale x 2 x i16> %1, + i16 %2, + <vscale x 2 x i1> %0, + iXLen %3) + + ret <vscale x 2 x i1> %a +} + +define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i16_i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, i16 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16.i16( + <vscale x 4 x i1> %0, + <vscale x 4 x i16> %1, + i16 %2, + <vscale x 4 x i1> %0, + iXLen %3) + + ret <vscale x 4 x i1> %a +} + +define <vscale x 8 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv8i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmslt.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16( + <vscale x 8 x i1> %0, + <vscale x 8 x i16> %1, + i16 %2, + <vscale x 8 x i1> %0, + iXLen %3) + + ret <vscale x 8 x i1> %a +} + +define <vscale x 16 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv16i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmslt.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16( + <vscale x 16 x i1> %0, + <vscale x 16 x i16> %1, + i16 %2, + <vscale x 16 x i1> %0, + iXLen %3) + + ret <vscale x 16 x i1> %a +} + +define <vscale x 1 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv1i32_i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, i32 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32.i32( + <vscale x 1 x i1> %0, + <vscale x 1 x i32> %1, + i32 %2, + <vscale x 1 x i1> %0, + iXLen %3) + + ret <vscale x 1 x i1> %a +} + +define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i32_i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, i32 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32.i32( + <vscale x 2 x i1> %0, + <vscale x 2 x i32> %1, + i32 %2, + <vscale x 2 x i1> %0, + iXLen %3) + + ret <vscale x 2 x i1> %a +} + +define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmslt.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32( + <vscale x 4 x i1> %0, + <vscale x 4 x i32> %1, + i32 %2, + <vscale x 4 x i1> %0, + iXLen %3) + + ret <vscale x 4 x i1> %a +} + +define <vscale x 8 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv8i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmslt.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32( + <vscale x 8 x i1> %0, + <vscale x 8 x i32> %1, + i32 %2, + <vscale x 8 x i1> %0, + iXLen %3) + + ret <vscale x 8 x i1> %a +} + +define <vscale x 1 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vmsle.vv v0, v9, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmslt.vx v8, v8, a0 +; RV64-NEXT: vmandn.mm v0, v0, v8 +; RV64-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64.i64( + <vscale x 1 x i1> %0, + <vscale x 1 x i64> %1, + i64 %2, + <vscale x 1 x i1> %0, + iXLen %3) + + ret <vscale x 1 x i1> %a +} + +define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmsle.vv v0, v10, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmslt.vx v2, v8, a0 +; RV64-NEXT: vmandn.mm v0, v0, v2 +; RV64-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64( + <vscale x 2 x i1> %0, + <vscale x 2 x i64> %1, + i64 %2, + <vscale x 2 x i1> %0, + iXLen %3) + + ret <vscale x 2 x i1> %a +} + +define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vmsle.vv v0, v12, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmslt.vx v4, v8, a0 +; RV64-NEXT: vmandn.mm v0, v0, v4 +; RV64-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64( + <vscale x 4 x i1> %0, + <vscale x 4 x i64> %1, + i64 %2, + <vscale x 4 x i1> %0, + iXLen %3) + + ret <vscale x 4 x i1> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll index 9410a99..c9abf43 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll @@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i8( define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i8( define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i8( define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i8( define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i8( @@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.nxv32i8( @@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i16( define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i16( define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i16( define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i16( @@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i16( @@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i32( define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i32( define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i32( @@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i32( @@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i64( define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.nxv2i64( @@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i64( @@ -1141,8 +1123,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_vx_nxv16i8_i8(<vscale x 16 x i8> %0, ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i8.i8( @@ -1163,11 +1145,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v11, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8( @@ -1189,8 +1171,8 @@ define <vscale x 32 x i1> @intrinsic_vmsgeu_vx_nxv32i8_i8(<vscale x 32 x i8> %0, ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.nxv32i8.i8( @@ -1211,11 +1193,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v13, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8( @@ -1381,8 +1363,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_vx_nxv8i16_i16(<vscale x 8 x i16> %0, ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i16.i16( @@ -1403,11 +1385,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v11, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16( @@ -1429,8 +1411,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_vx_nxv16i16_i16(<vscale x 16 x i16> ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i16.i16( @@ -1451,11 +1433,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v13, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16( @@ -1573,8 +1555,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_vx_nxv4i32_i32(<vscale x 4 x i32> %0, ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i32.i32( @@ -1595,11 +1577,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v11, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32( @@ -1621,8 +1603,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_vx_nxv8i32_i32(<vscale x 8 x i32> %0, ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i32.i32( @@ -1643,11 +1625,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v13, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32( @@ -1708,11 +1690,11 @@ define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vx_nxv1i64_i64(<vscale x 1 x i1> ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmsleu.vv v10, v11, v8, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsleu.vv v11, v10, v8, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1756,8 +1738,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_vx_nxv2i64_i64(<vscale x 2 x i64> %0, ; RV64-LABEL: intrinsic_vmsgeu_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; RV64-NEXT: vmsltu.vx v10, v8, a0 -; RV64-NEXT: vmnot.m v0, v10 +; RV64-NEXT: vmsltu.vx v0, v8, a0 +; RV64-NEXT: vmnot.m v0, v0 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.nxv2i64.i64( @@ -1784,20 +1766,20 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsleu.vv v11, v12, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsleu.vv v2, v12, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsltu.vx v11, v8, a0, v0.t -; RV64-NEXT: vmxor.mm v0, v11, v10 +; RV64-NEXT: vmsltu.vx v2, v8, a0, v0.t +; RV64-NEXT: vmxor.mm v0, v2, v10 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64.i64( @@ -1831,8 +1813,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_vx_nxv4i64_i64(<vscale x 4 x i64> %0, ; RV64-LABEL: intrinsic_vmsgeu_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsltu.vx v12, v8, a0 -; RV64-NEXT: vmnot.m v0, v12 +; RV64-NEXT: vmsltu.vx v0, v8, a0 +; RV64-NEXT: vmnot.m v0, v0 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i64.i64( @@ -1859,20 +1841,20 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsleu.vv v13, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsleu.vv v4, v16, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsltu.vx v13, v8, a0, v0.t -; RV64-NEXT: vmxor.mm v0, v13, v12 +; RV64-NEXT: vmsltu.vx v4, v8, a0, v0.t +; RV64-NEXT: vmxor.mm v0, v4, v12 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64.i64( @@ -2043,11 +2025,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, -7, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8( @@ -2078,11 +2060,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, -5, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8( @@ -2230,11 +2212,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 2, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16( @@ -2265,11 +2247,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16( @@ -2370,11 +2352,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 10, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32( @@ -2405,11 +2387,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 12, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32( @@ -2475,11 +2457,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, -16, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, -16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64.i64( @@ -2510,11 +2492,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, -14, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, -14, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64.i64( @@ -2604,8 +2586,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i8_i8(<vscale ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8( @@ -2622,8 +2604,8 @@ define <vscale x 32 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv32i8_i8(<vscale ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8( @@ -2694,8 +2676,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i16_i16(<vscale ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16( @@ -2712,8 +2694,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i16_i16(<vsca ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16( @@ -2766,8 +2748,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i32_i32(<vscale ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32( @@ -2784,8 +2766,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i32_i32(<vscale ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32( @@ -2836,18 +2818,16 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64(<vscale ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 -; RV32-NEXT: vmsleu.vv v10, v12, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmsleu.vv v0, v10, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; RV64-NEXT: vmsltu.vx v10, v8, a0 -; RV64-NEXT: vmandn.mm v0, v0, v10 +; RV64-NEXT: vmsltu.vx v2, v8, a0 +; RV64-NEXT: vmandn.mm v0, v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64.i64( @@ -2868,18 +2848,16 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64(<vscale ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: vmsleu.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vmsleu.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsltu.vx v12, v8, a0 -; RV64-NEXT: vmandn.mm v0, v0, v12 +; RV64-NEXT: vmsltu.vx v4, v8, a0 +; RV64-NEXT: vmandn.mm v0, v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll index bd64093..7d544af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgt.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsgt.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgt.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgt.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgt.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmslt.vv v11, v12, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmslt.vv v2, v12, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsgt.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsgt.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmslt.vv v13, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmslt.vv v4, v16, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsgt.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsgt.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll index 37a022e..65a9ab5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsgtu.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsltu.vv v11, v12, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsltu.vv v2, v12, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsgtu.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsgtu.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsltu.vv v13, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsltu.vv v4, v16, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsgtu.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsgtu.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll index cac4cbe..93d3852 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsle.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsle.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsle.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsle.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsle.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsle.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsle_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsle_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsle_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsle_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsle_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsle_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsle_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsle.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsle.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsle.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsle.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsle_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsle.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsle.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsle.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsle.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsle_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsle_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsle_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsle_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsle_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsle_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsle_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsle_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll index 75dc38f..8345365 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsleu.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsleu.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsleu.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsleu.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsleu.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsleu.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsleu.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsleu.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsleu.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsleu.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsleu.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsleu.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsleu.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsleu.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll index 2efbe46..5bf07a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmslt.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmslt.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmslt.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmslt.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmslt.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmslt.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmslt_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmslt_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmslt_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmslt_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmslt_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmslt_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmslt_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmslt.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmslt.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmslt.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmslt.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmslt_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmslt.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmslt.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmslt.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmslt.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmslt_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, -7, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmslt_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, -5, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmslt_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 2, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmslt_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmslt_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 10, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmslt_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 12, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmslt_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 8, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmslt_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll index c344dff..2068ec2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsltu.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsltu.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsltu.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsltu.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsltu.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsltu.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsltu.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsltu.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsltu.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsltu.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsltu.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsltu.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsltu.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsltu.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, -7, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, -5, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 2, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 10, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 12, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, -16, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, -16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, -14, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, -14, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll index 0c34dd7..88a09e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsne.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsne.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsne.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsne.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsne.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsne.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsne_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsne_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsne_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsne_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsne_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsne_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsne_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsne.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsne.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsne.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsne.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsne_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsne.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsne.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsne.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsne.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsne_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsne_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsne_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsne_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsne_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsne_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsne_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsne_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll index c0bba31..b8d6fa4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll @@ -162,8 +162,7 @@ define <vscale x 16 x i1> @test_vp_reverse_nxv16i1_masked(<vscale x 16 x i1> %sr ; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v10, v16, v12, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %dst = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> %src, <vscale x 16 x i1> %mask, i32 %evl) ret <vscale x 16 x i1> %dst @@ -200,8 +199,7 @@ define <vscale x 32 x i1> @test_vp_reverse_nxv32i1_masked(<vscale x 32 x i1> %sr ; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v24, v16, v0.t -; CHECK-NEXT: vmsne.vi v8, v12, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t ; CHECK-NEXT: ret %dst = call <vscale x 32 x i1> @llvm.experimental.vp.reverse.nxv32i1(<vscale x 32 x i1> %src, <vscale x 32 x i1> %mask, i32 %evl) ret <vscale x 32 x i1> %dst @@ -246,8 +244,7 @@ define <vscale x 64 x i1> @test_vp_reverse_nxv64i1_masked(<vscale x 64 x i1> %sr ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vslidedown.vx v8, v16, a1, v0.t -; CHECK-NEXT: vmsne.vi v16, v8, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %dst = call <vscale x 64 x i1> @llvm.experimental.vp.reverse.nxv64i1(<vscale x 64 x i1> %src, <vscale x 64 x i1> %mask, i32 %evl) ret <vscale x 64 x i1> %dst diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll index 0576255..52e4f11 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll @@ -369,8 +369,7 @@ define <vscale x 16 x i1> @test_vp_splice_nxv16i1_masked(<vscale x 16 x i1> %va, ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vslideup.vx v10, v12, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 16 x i1> @llvm.experimental.vp.splice.nxv16i1(<vscale x 16 x i1> %va, <vscale x 16 x i1> %vb, i32 5, <vscale x 16 x i1> %mask, i32 %evla, i32 %evlb) ret <vscale x 16 x i1> %v @@ -443,8 +442,7 @@ define <vscale x 32 x i1> @test_vp_splice_nxv32i1_masked(<vscale x 32 x i1> %va, ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vslideup.vx v12, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vmsne.vi v8, v12, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 32 x i1> @llvm.experimental.vp.splice.nxv32i1(<vscale x 32 x i1> %va, <vscale x 32 x i1> %vb, i32 5, <vscale x 32 x i1> %mask, i32 %evla, i32 %evlb) ret <vscale x 32 x i1> %v @@ -517,8 +515,7 @@ define <vscale x 64 x i1> @test_vp_splice_nxv64i1_masked(<vscale x 64 x i1> %va, ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vslideup.vx v16, v24, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsne.vi v0, v16, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 64 x i1> @llvm.experimental.vp.splice.nxv64i1(<vscale x 64 x i1> %va, <vscale x 64 x i1> %vb, i32 5, <vscale x 64 x i1> %mask, i32 %evla, i32 %evlb) ret <vscale x 64 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index 5ca62be..afa7931 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -375,11 +375,11 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale ; CHECK-NEXT: vl8r.v v16, (a4) ; CHECK-NEXT: vl8r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: sub a0, a3, a1 -; CHECK-NEXT: sltu a4, a3, a0 ; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a0, a4, a0 +; CHECK-NEXT: sub a0, a3, a1 +; CHECK-NEXT: sltu a2, a3, a0 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a0, a2, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index eb70f18..b663e3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -451,21 +451,22 @@ define <vscale x 16 x double> @vselect_combine_regression(<vscale x 16 x i64> %v ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v0, v8 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vl8re64.v v24, (a1) +; CHECK-NEXT: vl8re64.v v8, (a1) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vi v7, v16, 0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vi v0, v16, 0 +; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vmseq.vi v0, v0, 0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll index ad80976..cbb7cdec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll @@ -56,9 +56,8 @@ define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i64(<vscale x 2 x i64> %a, <vscale x ; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vand.vi v10, v8, 1, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vand.vi v8, v8, 1, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl) ret <vscale x 2 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index 7007b40..f848d9b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -296,29 +296,29 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a5, a1, 3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v0, a5 -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: vslidedown.vx v25, v0, a3 -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v16, (a3) -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v0, a4 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8re64.v v16, (a4) +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v25, v8 +; CHECK-NEXT: vslidedown.vx v0, v8, a3 ; CHECK-NEXT: slli a3, a1, 1 ; CHECK-NEXT: sub a4, a2, a3 -; CHECK-NEXT: sltu a6, a2, a4 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a4, a6, a4 -; CHECK-NEXT: sub a6, a4, a1 -; CHECK-NEXT: sltu a7, a4, a6 +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: sub a5, a4, a1 ; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a7, a7, -1 -; CHECK-NEXT: and a0, a7, a6 -; CHECK-NEXT: vmv1r.v v24, v25 -; CHECK-NEXT: vslidedown.vx v0, v25, a5 +; CHECK-NEXT: sltu a0, a4, a5 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a5 ; CHECK-NEXT: bltu a4, a1, .LBB17_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 @@ -326,7 +326,7 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v28, v16, 0, v0.t ; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB17_4 ; CHECK-NEXT: # %bb.3: diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm.mir b/llvm/test/CodeGen/RISCV/rvv/vxrm.mir index 64e19188..a588677 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm.mir @@ -11,9 +11,9 @@ body: | ; MIR-LABEL: name: verify_vxrm ; MIR: liveins: $v8, $v9, $x10 ; MIR-NEXT: {{ $}} - ; MIR-NEXT: dead $x0 = PseudoVSETVLI renamable $x10, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype + ; MIR-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x10, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype ; MIR-NEXT: WriteVXRMImm 0, implicit-def $vxrm - ; MIR-NEXT: renamable $v8 = PseudoVAADD_VV_MF8 undef $v8, renamable $v8, renamable $v9, 0, $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit $vxrm + ; MIR-NEXT: renamable $v8 = PseudoVAADD_VV_MF8 undef $v8, killed renamable $v8, killed renamable $v9, 0, $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit $vxrm ; MIR-NEXT: PseudoRET implicit $v8 ; ASM-LABEL: verify_vxrm: ; ASM: # %bb.0: @@ -23,8 +23,8 @@ body: | ; ASM-NEXT: ret %0:vr = COPY $v8 %1:vr = COPY $v9 - dead $x0 = PseudoVSETVLI killed renamable $x10, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype + %2:gprnox0 = COPY $x10 %pt:vr = IMPLICIT_DEF - renamable $v8 = PseudoVAADD_VV_MF8 %pt, killed renamable $v8, killed renamable $v9, 0, $noreg, 3 /* e8 */, 0 + renamable $v8 = PseudoVAADD_VV_MF8 %pt, %0, %1, 0, %2, 3 /* e8 */, 0 PseudoRET implicit $v8 ... |