aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp44
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h2
3 files changed, 13 insertions, 55 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 980e585..d35a022 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3117,30 +3117,20 @@ static bool isCttzOpc(unsigned Opc) {
SDValue AMDGPUTargetLowering::lowerCTLZResults(SDValue Op,
SelectionDAG &DAG) const {
auto SL = SDLoc(Op);
- auto Opc = Op.getOpcode();
auto Arg = Op.getOperand(0u);
auto ResultVT = Op.getValueType();
if (ResultVT != MVT::i8 && ResultVT != MVT::i16)
return {};
- assert(isCtlzOpc(Opc));
+ assert(isCtlzOpc(Op.getOpcode()));
assert(ResultVT == Arg.getValueType());
- const uint64_t NumBits = ResultVT.getFixedSizeInBits();
- SDValue NumExtBits = DAG.getConstant(32u - NumBits, SL, MVT::i32);
- SDValue NewOp;
-
- if (Opc == ISD::CTLZ_ZERO_UNDEF) {
- NewOp = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Arg);
- NewOp = DAG.getNode(ISD::SHL, SL, MVT::i32, NewOp, NumExtBits);
- NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp);
- } else {
- NewOp = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Arg);
- NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp);
- NewOp = DAG.getNode(ISD::SUB, SL, MVT::i32, NewOp, NumExtBits);
- }
-
+ auto const LeadingZeroes = 32u - ResultVT.getFixedSizeInBits();
+ auto SubVal = DAG.getConstant(LeadingZeroes, SL, MVT::i32);
+ auto NewOp = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Arg);
+ NewOp = DAG.getNode(Op.getOpcode(), SL, MVT::i32, NewOp);
+ NewOp = DAG.getNode(ISD::SUB, SL, MVT::i32, NewOp, SubVal);
return DAG.getNode(ISD::TRUNCATE, SL, ResultVT, NewOp);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 15a4b67..bd7bf78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1270,22 +1270,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.custom();
// The 64-bit versions produce 32-bit results, but only on the SALU.
- getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
- .legalFor({{S32, S32}, {S32, S64}})
- .customIf(scalarNarrowerThan(1, 32))
- .clampScalar(0, S32, S32)
- .clampScalar(1, S32, S64)
- .scalarize(0)
- .widenScalarToNextPow2(0, 32)
- .widenScalarToNextPow2(1, 32);
-
- getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF)
- .legalFor({{S32, S32}, {S32, S64}})
- .clampScalar(0, S32, S32)
- .clampScalar(1, S32, S64)
- .scalarize(0)
- .widenScalarToNextPow2(0, 32)
- .widenScalarToNextPow2(1, 32);
+ getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF})
+ .legalFor({{S32, S32}, {S32, S64}})
+ .clampScalar(0, S32, S32)
+ .clampScalar(1, S32, S64)
+ .scalarize(0)
+ .widenScalarToNextPow2(0, 32)
+ .widenScalarToNextPow2(1, 32);
// S64 is only legal on SALU, and needs to be broken into 32-bit elements in
// RegBankSelect.
@@ -2137,8 +2128,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTTZ:
return legalizeCTLZ_CTTZ(MI, MRI, B);
- case TargetOpcode::G_CTLZ_ZERO_UNDEF:
- return legalizeCTLZ_ZERO_UNDEF(MI, MRI, B);
case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
return legalizeFPTruncRound(MI, B);
case TargetOpcode::G_STACKSAVE:
@@ -4156,25 +4145,6 @@ bool AMDGPULegalizerInfo::legalizeCTLZ_CTTZ(MachineInstr &MI,
return true;
}
-bool AMDGPULegalizerInfo::legalizeCTLZ_ZERO_UNDEF(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(Src);
- TypeSize NumBits = SrcTy.getSizeInBits();
-
- assert(NumBits < 32u);
-
- auto ShiftAmt = B.buildConstant(S32, 32u - NumBits);
- auto Extend = B.buildAnyExt(S32, {Src}).getReg(0u);
- auto Shift = B.buildLShr(S32, {Extend}, ShiftAmt);
- auto Ctlz = B.buildInstr(AMDGPU::G_AMDGPU_FFBH_U32, {S32}, {Shift});
- B.buildTrunc(Dst, Ctlz);
- MI.eraseFromParent();
- return true;
-}
-
// Check that this is a G_XOR x, -1
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::G_XOR)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 4b1d821..e5ba84a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -108,8 +108,6 @@ public:
bool legalizeMul(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeCTLZ_CTTZ(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
- bool legalizeCTLZ_ZERO_UNDEF(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) const;
bool loadInputValue(Register DstReg, MachineIRBuilder &B,
const ArgDescriptor *Arg,