aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp60
1 files changed, 52 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1a686a9..730be69 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6073,9 +6073,6 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineOperand &Src0 = MI.getOperand(2);
MachineOperand &Src1 = MI.getOperand(3);
MachineOperand &Src2 = MI.getOperand(4);
- unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
- ? AMDGPU::S_ADDC_U32
- : AMDGPU::S_SUBB_U32;
if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) {
Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0)
@@ -6124,11 +6121,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addImm(0);
}
- // clang-format off
- BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg())
- .add(Src0)
- .add(Src1);
- // clang-format on
+ unsigned Opc = MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO
+ ? AMDGPU::S_ADDC_U32
+ : AMDGPU::S_SUBB_U32;
+
+ BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);
unsigned SelOpc =
ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
@@ -16571,6 +16568,53 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
}
}
+ // Eliminate setcc by using carryout from add/sub instruction
+
+ // LHS = ADD i64 RHS, Z LHSlo = UADDO i32 RHSlo, Zlo
+ // setcc LHS ult RHS -> LHSHi = UADDO_CARRY i32 RHShi, Zhi
+ // similarly for subtraction
+
+ // LHS = ADD i64 Y, 1 LHSlo = UADDO i32 Ylo, 1
+ // setcc LHS eq 0 -> LHSHi = UADDO_CARRY i32 Yhi, 0
+
+ if (VT == MVT::i64 && ((CC == ISD::SETULT &&
+ sd_match(LHS, m_Add(m_Specific(RHS), m_Value()))) ||
+ (CC == ISD::SETUGT &&
+ sd_match(LHS, m_Sub(m_Specific(RHS), m_Value()))) ||
+ (CC == ISD::SETEQ && CRHS && CRHS->isZero() &&
+ sd_match(LHS, m_Add(m_Value(), m_One()))))) {
+ bool IsAdd = LHS.getOpcode() == ISD::ADD;
+
+ SDValue Op0 = LHS.getOperand(0);
+ SDValue Op1 = LHS.getOperand(1);
+
+ SDValue Op0Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Op0);
+ SDValue Op1Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Op1);
+
+ SDValue Op0Hi = getHiHalf64(Op0, DAG);
+ SDValue Op1Hi = getHiHalf64(Op1, DAG);
+
+ SDValue NodeLo =
+ DAG.getNode(IsAdd ? ISD::UADDO : ISD::USUBO, SL,
+ DAG.getVTList(MVT::i32, MVT::i1), {Op0Lo, Op1Lo});
+
+ SDValue CarryInHi = NodeLo.getValue(1);
+ SDValue NodeHi = DAG.getNode(IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY,
+ SL, DAG.getVTList(MVT::i32, MVT::i1),
+ {Op0Hi, Op1Hi, CarryInHi});
+
+ SDValue ResultLo = NodeLo.getValue(0);
+ SDValue ResultHi = NodeHi.getValue(0);
+
+ SDValue JoinedResult =
+ DAG.getBuildVector(MVT::v2i32, SL, {ResultLo, ResultHi});
+
+ SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, JoinedResult);
+ SDValue Overflow = NodeHi.getValue(1);
+ DCI.CombineTo(LHS.getNode(), Result);
+ return Overflow;
+ }
+
if (VT != MVT::f32 && VT != MVT::f64 &&
(!Subtarget->has16BitInsts() || VT != MVT::f16))
return SDValue();