diff options
author | Piotr Fusik <p.fusik@samsung.com> | 2025-07-17 16:37:59 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-17 16:37:59 +0200 |
commit | 9fa3971fac27fbe0a6e3b9745d201c16f5f98bc2 (patch) | |
tree | 8d50b22cc7a942e255f38d68da36100e52e9f2b3 /llvm | |
parent | ba5f31cfaa2452a4a94a482b53d899d6f2ee0e66 (diff) | |
download | llvm-9fa3971fac27fbe0a6e3b9745d201c16f5f98bc2.zip llvm-9fa3971fac27fbe0a6e3b9745d201c16f5f98bc2.tar.gz llvm-9fa3971fac27fbe0a6e3b9745d201c16f5f98bc2.tar.bz2 |
[DAGCombiner] Fold vector subtraction if above threshold to `umin` (#148834)
This extends #134235 and #135194 to vectors.
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 87 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll | 88 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll | 88 |
3 files changed, 122 insertions, 141 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0e8e4c9..40464e9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -609,6 +609,8 @@ namespace { SDValue foldABSToABD(SDNode *N, const SDLoc &DL); SDValue foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const SDLoc &DL); + SDValue foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True, + SDValue False, ISD::CondCode CC, const SDLoc &DL); SDValue unfoldMaskedMerge(SDNode *N); SDValue unfoldExtremeBitClearingToShifts(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, @@ -859,7 +861,7 @@ namespace { auto LK = TLI.getTypeConversion(*DAG.getContext(), VT); return (LK.first == TargetLoweringBase::TypeLegal || LK.first == TargetLoweringBase::TypePromoteInteger) && - TLI.isOperationLegal(ISD::UMIN, LK.second); + TLI.isOperationLegalOrCustom(ISD::UMIN, LK.second); } public: @@ -4093,6 +4095,26 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return N0; } + // (sub x, ([v]select (ult x, y), 0, y)) -> (umin x, (sub x, y)) + // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y)) + if (N1.hasOneUse() && hasUMin(VT)) { + SDValue Y; + if (sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), + m_SpecificCondCode(ISD::SETULT)), + m_Zero(), m_Deferred(Y))) || + sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), + m_SpecificCondCode(ISD::SETUGE)), + m_Deferred(Y), m_Zero())) || + sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), + m_SpecificCondCode(ISD::SETULT)), + m_Zero(), m_Deferred(Y))) || + sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), + m_SpecificCondCode(ISD::SETUGE)), + m_Deferred(Y), m_Zero()))) + return DAG.getNode(ISD::UMIN, DL, VT, N0, + DAG.getNode(ISD::SUB, DL, VT, N0, Y)); + } + if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -4442,20 +4464,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B)))) return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT); - // (sub x, (select (ult x, y), 0, y)) -> (umin x, (sub x, y)) - // (sub x, (select (uge x, y), y, 0)) -> (umin x, (sub x, y)) - if (hasUMin(VT)) { - SDValue Y; - if (sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETULT)), - m_Zero(), m_Deferred(Y)))) || - sd_match(N1, m_OneUse(m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETUGE)), - m_Deferred(Y), m_Zero())))) - return DAG.getNode(ISD::UMIN, DL, VT, N0, - DAG.getNode(ISD::SUB, DL, VT, N0, Y)); - } - return SDValue(); } @@ -12173,6 +12181,30 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True, return SDValue(); } +// ([v]select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x) +// ([v]select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C)) +SDValue DAGCombiner::foldSelectToUMin(SDValue LHS, SDValue RHS, SDValue True, + SDValue False, ISD::CondCode CC, + const SDLoc &DL) { + APInt C; + EVT VT = True.getValueType(); + if (sd_match(RHS, m_ConstInt(C)) && hasUMin(VT)) { + if (CC == ISD::SETUGT && LHS == False && + sd_match(True, m_Add(m_Specific(False), m_SpecificInt(~C)))) { + SDValue AddC = DAG.getConstant(~C, DL, VT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, False, AddC); + return DAG.getNode(ISD::UMIN, DL, VT, Add, False); + } + if (CC == ISD::SETULT && LHS == True && + sd_match(False, m_Add(m_Specific(True), m_SpecificInt(-C)))) { + SDValue AddC = DAG.getConstant(-C, DL, VT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, True, AddC); + return DAG.getNode(ISD::UMIN, DL, VT, True, Add); + } + } + return SDValue(); +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12358,24 +12390,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // (select (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x) // (select (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C)) - APInt C; - if (sd_match(Cond1, m_ConstInt(C)) && hasUMin(VT)) { - if (CC == ISD::SETUGT && Cond0 == N2 && - sd_match(N1, m_Add(m_Specific(N2), m_SpecificInt(~C)))) { - // The resulting code relies on an unsigned wrap in ADD. - // Recreating ADD to drop possible nuw/nsw flags. - SDValue AddC = DAG.getConstant(~C, DL, VT); - SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N2, AddC); - return DAG.getNode(ISD::UMIN, DL, VT, Add, N2); - } - if (CC == ISD::SETULT && Cond0 == N1 && - sd_match(N2, m_Add(m_Specific(N1), m_SpecificInt(-C)))) { - // Ditto. - SDValue AddC = DAG.getConstant(-C, DL, VT); - SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, AddC); - return DAG.getNode(ISD::UMIN, DL, VT, N1, Add); - } - } + if (SDValue UMin = foldSelectToUMin(Cond0, Cond1, N1, N2, CC, DL)) + return UMin; } if (!VT.isVector()) @@ -13412,6 +13428,11 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } } + + // (vselect (ugt x, C), (add x, ~C), x) -> (umin (add x, ~C), x) + // (vselect (ult x, C), x, (add x, -C)) -> (umin x, (add x, -C)) + if (SDValue UMin = foldSelectToUMin(LHS, RHS, N1, N2, CC, DL)) + return UMin; } if (SimplifySelectOps(N, N1, N2)) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 35b9457..9df71cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -5712,9 +5712,8 @@ define <8 x i8> @vsub_if_uge_v8i8(<8 x i8> %va, <8 x i8> %vb) { ; CHECK-LABEL: vsub_if_uge_v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vsub.vv v9, v8, v9 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp ult <8 x i8> %va, %vb %select = select <8 x i1> %cmp, <8 x i8> zeroinitializer, <8 x i8> %vb @@ -5725,9 +5724,9 @@ define <8 x i8> @vsub_if_uge_v8i8(<8 x i8> %va, <8 x i8> %vb) { define <8 x i8> @vsub_if_uge_swapped_v8i8(<8 x i8> %va, <8 x i8> %vb) { ; CHECK-LABEL: vsub_if_uge_swapped_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vsub.vv v9, v8, v9 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp uge <8 x i8> %va, %vb %select = select <8 x i1> %cmp, <8 x i8> %vb, <8 x i8> zeroinitializer @@ -5739,9 +5738,8 @@ define <8 x i16> @vsub_if_uge_v8i16(<8 x i16> %va, <8 x i16> %vb) { ; CHECK-LABEL: vsub_if_uge_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vsub.vv v9, v8, v9 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp ult <8 x i16> %va, %vb %select = select <8 x i1> %cmp, <8 x i16> zeroinitializer, <8 x i16> %vb @@ -5752,9 +5750,9 @@ define <8 x i16> @vsub_if_uge_v8i16(<8 x i16> %va, <8 x i16> %vb) { define <8 x i16> @vsub_if_uge_swapped_v8i16(<8 x i16> %va, <8 x i16> %vb) { ; CHECK-LABEL: vsub_if_uge_swapped_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsub.vv v9, v8, v9 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp uge <8 x i16> %va, %vb %select = select <8 x i1> %cmp, <8 x i16> %vb, <8 x i16> zeroinitializer @@ -5766,9 +5764,8 @@ define <4 x i32> @vsub_if_uge_v4i32(<4 x i32> %va, <4 x i32> %vb) { ; CHECK-LABEL: vsub_if_uge_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vsub.vv v9, v8, v9 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp ult <4 x i32> %va, %vb %select = select <4 x i1> %cmp, <4 x i32> zeroinitializer, <4 x i32> %vb @@ -5779,9 +5776,9 @@ define <4 x i32> @vsub_if_uge_v4i32(<4 x i32> %va, <4 x i32> %vb) { define <4 x i32> @vsub_if_uge_swapped_v4i32(<4 x i32> %va, <4 x i32> %vb) { ; CHECK-LABEL: vsub_if_uge_swapped_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsub.vv v9, v8, v9 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp uge <4 x i32> %va, %vb %select = select <4 x i1> %cmp, <4 x i32> %vb, <4 x i32> zeroinitializer @@ -5793,9 +5790,8 @@ define <2 x i64> @vsub_if_uge_v2i64(<2 x i64> %va, <2 x i64> %vb) { ; CHECK-LABEL: vsub_if_uge_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vsub.vv v9, v8, v9 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp ult <2 x i64> %va, %vb %select = select <2 x i1> %cmp, <2 x i64> zeroinitializer, <2 x i64> %vb @@ -5806,9 +5802,9 @@ define <2 x i64> @vsub_if_uge_v2i64(<2 x i64> %va, <2 x i64> %vb) { define <2 x i64> @vsub_if_uge_swapped_v2i64(<2 x i64> %va, <2 x i64> %vb) { ; CHECK-LABEL: vsub_if_uge_swapped_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsub.vv v9, v8, v9 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp uge <2 x i64> %va, %vb %select = select <2 x i1> %cmp, <2 x i64> %vb, <2 x i64> zeroinitializer @@ -5819,9 +5815,9 @@ define <2 x i64> @vsub_if_uge_swapped_v2i64(<2 x i64> %va, <2 x i64> %vb) { define <8 x i8> @sub_if_uge_C_v8i8(<8 x i8> %x) { ; CHECK-LABEL: sub_if_uge_C_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmsgtu.vi v0, v8, 12 -; CHECK-NEXT: vadd.vi v8, v8, -13, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vadd.vi v9, v8, -13 +; CHECK-NEXT: vminu.vv v8, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ugt <8 x i8> %x, splat (i8 12) %sub = add <8 x i8> %x, splat (i8 -13) @@ -5832,11 +5828,10 @@ define <8 x i8> @sub_if_uge_C_v8i8(<8 x i8> %x) { define <8 x i16> @sub_if_uge_C_v8i16(<8 x i16> %x) { ; CHECK-LABEL: sub_if_uge_C_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 2000 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vmsgtu.vx v0, v8, a0 ; CHECK-NEXT: li a0, -2001 -; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vadd.vx v9, v8, a0 +; CHECK-NEXT: vminu.vv v8, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ugt <8 x i16> %x, splat (i16 2000) %sub = add <8 x i16> %x, splat (i16 -2001) @@ -5847,13 +5842,11 @@ define <8 x i16> @sub_if_uge_C_v8i16(<8 x i16> %x) { define <4 x i32> @sub_if_uge_C_v4i32(<4 x i32> %x) { ; CHECK-LABEL: sub_if_uge_C_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 16 -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmsgtu.vx v0, v8, a0 ; CHECK-NEXT: lui a0, 1048560 ; CHECK-NEXT: addi a0, a0, 15 -; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vadd.vx v9, v8, a0 +; CHECK-NEXT: vminu.vv v8, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ugt <4 x i32> %x, splat (i32 65520) %sub = add <4 x i32> %x, splat (i32 -65521) @@ -5864,14 +5857,11 @@ define <4 x i32> @sub_if_uge_C_v4i32(<4 x i32> %x) { define <4 x i32> @sub_if_uge_C_swapped_v4i32(<4 x i32> %x) { ; CHECK-LABEL: sub_if_uge_C_swapped_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 16 -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmsltu.vx v0, v8, a0 ; CHECK-NEXT: lui a0, 1048560 ; CHECK-NEXT: addi a0, a0, 15 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vadd.vx v9, v8, a0 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp ult <4 x i32> %x, splat (i32 65521) %sub = add <4 x i32> %x, splat (i32 -65521) @@ -5883,38 +5873,28 @@ define <2 x i64> @sub_if_uge_C_v2i64(<2 x i64> %x) nounwind { ; RV32-LABEL: sub_if_uge_C_v2i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: lui a1, 172127 -; RV32-NEXT: mv a2, sp -; RV32-NEXT: addi a1, a1, 512 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: sw a0, 4(sp) ; RV32-NEXT: li a0, -2 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v9, (a2), zero ; RV32-NEXT: lui a1, 876449 ; RV32-NEXT: addi a1, a1, -513 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vmsltu.vv v0, v9, v8 -; RV32-NEXT: vadd.vv v8, v8, v10, v0.t +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vadd.vv v9, v8, v9 +; RV32-NEXT: vminu.vv v8, v9, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: sub_if_uge_C_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2384 -; RV64-NEXT: addi a0, a0, 761 -; RV64-NEXT: slli a0, a0, 9 -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vmsgtu.vx v0, v8, a0 ; RV64-NEXT: lui a0, 1048278 ; RV64-NEXT: addi a0, a0, -95 ; RV64-NEXT: slli a0, a0, 12 ; RV64-NEXT: addi a0, a0, -513 -; RV64-NEXT: vadd.vx v8, v8, a0, v0.t +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vadd.vx v9, v8, a0 +; RV64-NEXT: vminu.vv v8, v9, v8 ; RV64-NEXT: ret %cmp = icmp ugt <2 x i64> %x, splat (i64 5000000000) %sub = add <2 x i64> %x, splat (i64 -5000000001) diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll index a21a526..9b58cb3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll @@ -898,9 +898,8 @@ define <vscale x 2 x i8> @vsub_if_uge_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 ; CHECK-LABEL: vsub_if_uge_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vsub.vv v9, v8, v9 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp ult <vscale x 2 x i8> %va, %vb %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i8> zeroinitializer, <vscale x 2 x i8> %vb @@ -911,9 +910,9 @@ define <vscale x 2 x i8> @vsub_if_uge_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 define <vscale x 2 x i8> @vsub_if_uge_swapped_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) { ; CHECK-LABEL: vsub_if_uge_swapped_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vsub.vv v9, v8, v9 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp uge <vscale x 2 x i8> %va, %vb %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i8> %vb, <vscale x 2 x i8> zeroinitializer @@ -925,9 +924,8 @@ define <vscale x 2 x i16> @vsub_if_uge_nxv2i16(<vscale x 2 x i16> %va, <vscale x ; CHECK-LABEL: vsub_if_uge_nxv2i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vsub.vv v9, v8, v9 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp ult <vscale x 2 x i16> %va, %vb %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> %vb @@ -938,9 +936,9 @@ define <vscale x 2 x i16> @vsub_if_uge_nxv2i16(<vscale x 2 x i16> %va, <vscale x define <vscale x 2 x i16> @vsub_if_uge_swapped_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) { ; CHECK-LABEL: vsub_if_uge_swapped_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vsub.vv v9, v8, v9 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp uge <vscale x 2 x i16> %va, %vb %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i16> %vb, <vscale x 2 x i16> zeroinitializer @@ -952,9 +950,8 @@ define <vscale x 2 x i32> @vsub_if_uge_nxv2i32(<vscale x 2 x i32> %va, <vscale x ; CHECK-LABEL: vsub_if_uge_nxv2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vsub.vv v9, v8, v9 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp ult <vscale x 2 x i32> %va, %vb %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %vb @@ -965,9 +962,9 @@ define <vscale x 2 x i32> @vsub_if_uge_nxv2i32(<vscale x 2 x i32> %va, <vscale x define <vscale x 2 x i32> @vsub_if_uge_swapped_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) { ; CHECK-LABEL: vsub_if_uge_swapped_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 -; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsub.vv v9, v8, v9 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp uge <vscale x 2 x i32> %va, %vb %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i32> %vb, <vscale x 2 x i32> zeroinitializer @@ -979,9 +976,8 @@ define <vscale x 2 x i64> @vsub_if_uge_nxv2i64(<vscale x 2 x i64> %va, <vscale x ; CHECK-LABEL: vsub_if_uge_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmsltu.vv v0, v8, v10 ; CHECK-NEXT: vsub.vv v10, v8, v10 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vminu.vv v8, v8, v10 ; CHECK-NEXT: ret %cmp = icmp ult <vscale x 2 x i64> %va, %vb %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %vb @@ -992,9 +988,9 @@ define <vscale x 2 x i64> @vsub_if_uge_nxv2i64(<vscale x 2 x i64> %va, <vscale x define <vscale x 2 x i64> @vsub_if_uge_swapped_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) { ; CHECK-LABEL: vsub_if_uge_swapped_nxv2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v10, v8 -; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsub.vv v10, v8, v10 +; CHECK-NEXT: vminu.vv v8, v8, v10 ; CHECK-NEXT: ret %cmp = icmp uge <vscale x 2 x i64> %va, %vb %select = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %vb, <vscale x 2 x i64> zeroinitializer @@ -1005,9 +1001,9 @@ define <vscale x 2 x i64> @vsub_if_uge_swapped_nxv2i64(<vscale x 2 x i64> %va, < define <vscale x 2 x i8> @sub_if_uge_C_nxv2i8(<vscale x 2 x i8> %x) { ; CHECK-LABEL: sub_if_uge_C_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmsgtu.vi v0, v8, 12 -; CHECK-NEXT: vadd.vi v8, v8, -13, v0.t +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vadd.vi v9, v8, -13 +; CHECK-NEXT: vminu.vv v8, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ugt <vscale x 2 x i8> %x, splat (i8 12) %sub = add <vscale x 2 x i8> %x, splat (i8 -13) @@ -1018,11 +1014,10 @@ define <vscale x 2 x i8> @sub_if_uge_C_nxv2i8(<vscale x 2 x i8> %x) { define <vscale x 2 x i16> @sub_if_uge_C_nxv2i16(<vscale x 2 x i16> %x) { ; CHECK-LABEL: sub_if_uge_C_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 2000 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu -; CHECK-NEXT: vmsgtu.vx v0, v8, a0 ; CHECK-NEXT: li a0, -2001 -; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vadd.vx v9, v8, a0 +; CHECK-NEXT: vminu.vv v8, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ugt <vscale x 2 x i16> %x, splat (i16 2000) %sub = add <vscale x 2 x i16> %x, splat (i16 -2001) @@ -1033,13 +1028,11 @@ define <vscale x 2 x i16> @sub_if_uge_C_nxv2i16(<vscale x 2 x i16> %x) { define <vscale x 2 x i32> @sub_if_uge_C_nxv2i32(<vscale x 2 x i32> %x) { ; CHECK-LABEL: sub_if_uge_C_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 16 -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vmsgtu.vx v0, v8, a0 ; CHECK-NEXT: lui a0, 1048560 ; CHECK-NEXT: addi a0, a0, 15 -; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vadd.vx v9, v8, a0 +; CHECK-NEXT: vminu.vv v8, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ugt <vscale x 2 x i32> %x, splat (i32 65520) %sub = add <vscale x 2 x i32> %x, splat (i32 -65521) @@ -1050,14 +1043,11 @@ define <vscale x 2 x i32> @sub_if_uge_C_nxv2i32(<vscale x 2 x i32> %x) { define <vscale x 2 x i32> @sub_if_uge_C_swapped_nxv2i32(<vscale x 2 x i32> %x) { ; CHECK-LABEL: sub_if_uge_C_swapped_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 16 -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmsltu.vx v0, v8, a0 ; CHECK-NEXT: lui a0, 1048560 ; CHECK-NEXT: addi a0, a0, 15 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vadd.vx v9, v8, a0 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp ult <vscale x 2 x i32> %x, splat (i32 65521) %sub = add <vscale x 2 x i32> %x, splat (i32 -65521) @@ -1069,38 +1059,28 @@ define <vscale x 2 x i64> @sub_if_uge_C_nxv2i64(<vscale x 2 x i64> %x) nounwind ; RV32-LABEL: sub_if_uge_C_nxv2i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: lui a1, 172127 -; RV32-NEXT: mv a2, sp -; RV32-NEXT: addi a1, a1, 512 -; RV32-NEXT: sw a1, 0(sp) -; RV32-NEXT: sw a0, 4(sp) ; RV32-NEXT: li a0, -2 -; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, mu -; RV32-NEXT: vlse64.v v10, (a2), zero ; RV32-NEXT: lui a1, 876449 ; RV32-NEXT: addi a1, a1, -513 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmsltu.vv v0, v10, v8 -; RV32-NEXT: vadd.vv v8, v8, v12, v0.t +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vadd.vv v10, v8, v10 +; RV32-NEXT: vminu.vv v8, v10, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: sub_if_uge_C_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, 2384 -; RV64-NEXT: addi a0, a0, 761 -; RV64-NEXT: slli a0, a0, 9 -; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu -; RV64-NEXT: vmsgtu.vx v0, v8, a0 ; RV64-NEXT: lui a0, 1048278 ; RV64-NEXT: addi a0, a0, -95 ; RV64-NEXT: slli a0, a0, 12 ; RV64-NEXT: addi a0, a0, -513 -; RV64-NEXT: vadd.vx v8, v8, a0, v0.t +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV64-NEXT: vadd.vx v10, v8, a0 +; RV64-NEXT: vminu.vv v8, v10, v8 ; RV64-NEXT: ret %cmp = icmp ugt <vscale x 2 x i64> %x, splat (i64 5000000000) %sub = add <vscale x 2 x i64> %x, splat (i64 -5000000001) |