diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 60 |
1 files changed, 52 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1a686a9..730be69 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6073,9 +6073,6 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineOperand &Src0 = MI.getOperand(2); MachineOperand &Src1 = MI.getOperand(3); MachineOperand &Src2 = MI.getOperand(4); - unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) - ? AMDGPU::S_ADDC_U32 - : AMDGPU::S_SUBB_U32; if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) { Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0) @@ -6124,11 +6121,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addImm(0); } - // clang-format off - BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()) - .add(Src0) - .add(Src1); - // clang-format on + unsigned Opc = MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO + ? AMDGPU::S_ADDC_U32 + : AMDGPU::S_SUBB_U32; + + BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1); unsigned SelOpc = ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32; @@ -16571,6 +16568,53 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N, } } + // Eliminate setcc by using carryout from add/sub instruction + + // LHS = ADD i64 RHS, Z LHSlo = UADDO i32 RHSlo, Zlo + // setcc LHS ult RHS -> LHSHi = UADDO_CARRY i32 RHShi, Zhi + // similarly for subtraction + + // LHS = ADD i64 Y, 1 LHSlo = UADDO i32 Ylo, 1 + // setcc LHS eq 0 -> LHSHi = UADDO_CARRY i32 Yhi, 0 + + if (VT == MVT::i64 && ((CC == ISD::SETULT && + sd_match(LHS, m_Add(m_Specific(RHS), m_Value()))) || + (CC == ISD::SETUGT && + sd_match(LHS, m_Sub(m_Specific(RHS), m_Value()))) || + (CC == ISD::SETEQ && CRHS && CRHS->isZero() && + sd_match(LHS, m_Add(m_Value(), m_One()))))) { + bool IsAdd = LHS.getOpcode() == ISD::ADD; + + SDValue Op0 = LHS.getOperand(0); + SDValue Op1 = LHS.getOperand(1); + + SDValue Op0Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Op0); + SDValue Op1Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Op1); + + SDValue Op0Hi = getHiHalf64(Op0, DAG); + SDValue Op1Hi = getHiHalf64(Op1, DAG); + + SDValue NodeLo = + DAG.getNode(IsAdd ? ISD::UADDO : ISD::USUBO, SL, + DAG.getVTList(MVT::i32, MVT::i1), {Op0Lo, Op1Lo}); + + SDValue CarryInHi = NodeLo.getValue(1); + SDValue NodeHi = DAG.getNode(IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY, + SL, DAG.getVTList(MVT::i32, MVT::i1), + {Op0Hi, Op1Hi, CarryInHi}); + + SDValue ResultLo = NodeLo.getValue(0); + SDValue ResultHi = NodeHi.getValue(0); + + SDValue JoinedResult = + DAG.getBuildVector(MVT::v2i32, SL, {ResultLo, ResultHi}); + + SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, JoinedResult); + SDValue Overflow = NodeHi.getValue(1); + DCI.CombineTo(LHS.getNode(), Result); + return Overflow; + } + if (VT != MVT::f32 && VT != MVT::f64 && (!Subtarget->has16BitInsts() || VT != MVT::f16)) return SDValue(); |