diff options
author | Leon Clark <PeddleSpam@users.noreply.github.com> | 2024-05-20 16:00:15 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-20 16:00:15 +0100 |
commit | 6f236cdc42601d96f06781e75d0112bdb8d4a4ce (patch) | |
tree | 3cf0897f218b3939f836ce54d4c2536252085568 /llvm/lib/Target | |
parent | 2a90d59fc3905d3d56dac99fa25640a6d6a7bad2 (diff) | |
download | llvm-revert-88512-ctlz_zu.zip llvm-revert-88512-ctlz_zu.tar.gz llvm-revert-88512-ctlz_zu.tar.bz2 |
Revert "[AMDGPU] Use LSH for lowering ctlz_zero_undef.i8/i16 (#88512)"revert-88512-ctlz_zu
This reverts commit fb2c6597e39e9e1a775525ea0236b2f89e46acff.
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 2 |
3 files changed, 13 insertions, 55 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 980e585..d35a022 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3117,30 +3117,20 @@ static bool isCttzOpc(unsigned Opc) { SDValue AMDGPUTargetLowering::lowerCTLZResults(SDValue Op, SelectionDAG &DAG) const { auto SL = SDLoc(Op); - auto Opc = Op.getOpcode(); auto Arg = Op.getOperand(0u); auto ResultVT = Op.getValueType(); if (ResultVT != MVT::i8 && ResultVT != MVT::i16) return {}; - assert(isCtlzOpc(Opc)); + assert(isCtlzOpc(Op.getOpcode())); assert(ResultVT == Arg.getValueType()); - const uint64_t NumBits = ResultVT.getFixedSizeInBits(); - SDValue NumExtBits = DAG.getConstant(32u - NumBits, SL, MVT::i32); - SDValue NewOp; - - if (Opc == ISD::CTLZ_ZERO_UNDEF) { - NewOp = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Arg); - NewOp = DAG.getNode(ISD::SHL, SL, MVT::i32, NewOp, NumExtBits); - NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp); - } else { - NewOp = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Arg); - NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp); - NewOp = DAG.getNode(ISD::SUB, SL, MVT::i32, NewOp, NumExtBits); - } - + auto const LeadingZeroes = 32u - ResultVT.getFixedSizeInBits(); + auto SubVal = DAG.getConstant(LeadingZeroes, SL, MVT::i32); + auto NewOp = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Arg); + NewOp = DAG.getNode(Op.getOpcode(), SL, MVT::i32, NewOp); + NewOp = DAG.getNode(ISD::SUB, SL, MVT::i32, NewOp, SubVal); return DAG.getNode(ISD::TRUNCATE, SL, ResultVT, NewOp); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 15a4b67..bd7bf78 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1270,22 +1270,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .custom(); // The 64-bit versions produce 32-bit results, but only on the SALU. - getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) - .legalFor({{S32, S32}, {S32, S64}}) - .customIf(scalarNarrowerThan(1, 32)) - .clampScalar(0, S32, S32) - .clampScalar(1, S32, S64) - .scalarize(0) - .widenScalarToNextPow2(0, 32) - .widenScalarToNextPow2(1, 32); - - getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF) - .legalFor({{S32, S32}, {S32, S64}}) - .clampScalar(0, S32, S32) - .clampScalar(1, S32, S64) - .scalarize(0) - .widenScalarToNextPow2(0, 32) - .widenScalarToNextPow2(1, 32); + getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF}) + .legalFor({{S32, S32}, {S32, S64}}) + .clampScalar(0, S32, S32) + .clampScalar(1, S32, S64) + .scalarize(0) + .widenScalarToNextPow2(0, 32) + .widenScalarToNextPow2(1, 32); // S64 is only legal on SALU, and needs to be broken into 32-bit elements in // RegBankSelect. @@ -2137,8 +2128,6 @@ bool AMDGPULegalizerInfo::legalizeCustom( case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTTZ: return legalizeCTLZ_CTTZ(MI, MRI, B); - case TargetOpcode::G_CTLZ_ZERO_UNDEF: - return legalizeCTLZ_ZERO_UNDEF(MI, MRI, B); case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND: return legalizeFPTruncRound(MI, B); case TargetOpcode::G_STACKSAVE: @@ -4156,25 +4145,6 @@ bool AMDGPULegalizerInfo::legalizeCTLZ_CTTZ(MachineInstr &MI, return true; } -bool AMDGPULegalizerInfo::legalizeCTLZ_ZERO_UNDEF(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - LLT SrcTy = MRI.getType(Src); - TypeSize NumBits = SrcTy.getSizeInBits(); - - assert(NumBits < 32u); - - auto ShiftAmt = B.buildConstant(S32, 32u - NumBits); - auto Extend = B.buildAnyExt(S32, {Src}).getReg(0u); - auto Shift = B.buildLShr(S32, {Extend}, ShiftAmt); - auto Ctlz = B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {S32}, {Shift}); - B.buildTrunc(Dst, Ctlz); - MI.eraseFromParent(); - return true; -} - // Check that this is a G_XOR x, -1 static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::G_XOR) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 4b1d821..e5ba84a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -108,8 +108,6 @@ public: bool legalizeMul(LegalizerHelper &Helper, MachineInstr &MI) const; bool legalizeCTLZ_CTTZ(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; - bool legalizeCTLZ_ZERO_UNDEF(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B) const; bool loadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg, |