aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2022-01-09 19:33:57 -0500
committerMatt Arsenault <Matthew.Arsenault@amd.com>2022-01-13 12:08:18 -0500
commit59994c25f9df9db598c48fd33d8c3089b45184cd (patch)
treec94c3f34704d4527a58a65f9c126daab308d25a3 /llvm/lib
parenta6f49423c1ecad4b414c204822d26d9025da2599 (diff)
downloadllvm-59994c25f9df9db598c48fd33d8c3089b45184cd.zip
llvm-59994c25f9df9db598c48fd33d8c3089b45184cd.tar.gz
llvm-59994c25f9df9db598c48fd33d8c3089b45184cd.tar.bz2
AMDGPU: Select workitem ID intrinsics to 0 with req_work_group_size
Shockingly we weren't doing this already. We should probably have this be done earlier in the IR too, but it's still helpful to have the lowering guarantee it so that we can modify the ABI implicit inputs based on it.
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp9
2 files changed, 25 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 139f60b..800bd03 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2888,6 +2888,8 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
Register AndMaskSrc = LiveIn;
+ // TODO: Avoid clearing the high bits if we know workitem id y/z are always
+ // 0.
if (Shift != 0) {
auto ShiftAmt = B.buildConstant(S32, Shift);
AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0);
@@ -4966,6 +4968,12 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
return true;
}
+static bool replaceWithConstant(MachineIRBuilder &B, MachineInstr &MI, int64_t C) {
+ B.buildConstant(MI.getOperand(0).getReg(), C);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &B = Helper.MIRBuilder;
@@ -5069,12 +5077,20 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_implicitarg_ptr:
return legalizeImplicitArgPtr(MI, MRI, B);
case Intrinsic::amdgcn_workitem_id_x:
+ if (ST.getMaxWorkitemID(B.getMF().getFunction(), 0) == 0)
+ return replaceWithConstant(B, MI, 0);
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKITEM_ID_X);
case Intrinsic::amdgcn_workitem_id_y:
+ if (ST.getMaxWorkitemID(B.getMF().getFunction(), 1) == 0)
+ return replaceWithConstant(B, MI, 0);
+
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
case Intrinsic::amdgcn_workitem_id_z:
+ if (ST.getMaxWorkitemID(B.getMF().getFunction(), 2) == 0)
+ return replaceWithConstant(B, MI, 0);
+
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
case Intrinsic::amdgcn_workgroup_id_x:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b48a380..af96b28 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6756,14 +6756,23 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return getPreloadedValue(DAG, *MFI, VT,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
case Intrinsic::amdgcn_workitem_id_x:
+ if (Subtarget->getMaxWorkitemID(MF.getFunction(), 0) == 0)
+ return DAG.getConstant(0, DL, MVT::i32);
+
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDX);
case Intrinsic::amdgcn_workitem_id_y:
+ if (Subtarget->getMaxWorkitemID(MF.getFunction(), 1) == 0)
+ return DAG.getConstant(0, DL, MVT::i32);
+
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDY);
case Intrinsic::amdgcn_workitem_id_z:
+ if (Subtarget->getMaxWorkitemID(MF.getFunction(), 2) == 0)
+ return DAG.getConstant(0, DL, MVT::i32);
+
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()),
MFI->getArgInfo().WorkItemIDZ);