diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2022-01-09 19:33:57 -0500 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2022-01-13 12:08:18 -0500 |
| commit | 59994c25f9df9db598c48fd33d8c3089b45184cd (patch) | |
| tree | c94c3f34704d4527a58a65f9c126daab308d25a3 /llvm/lib | |
| parent | a6f49423c1ecad4b414c204822d26d9025da2599 (diff) | |
| download | llvm-59994c25f9df9db598c48fd33d8c3089b45184cd.zip llvm-59994c25f9df9db598c48fd33d8c3089b45184cd.tar.gz llvm-59994c25f9df9db598c48fd33d8c3089b45184cd.tar.bz2 | |
AMDGPU: Select workitem ID intrinsics to 0 with req_work_group_size
Shockingly we weren't doing this already. We should probably have this
be done earlier in the IR too, but it's still helpful to have the
lowering guarantee it so that we can modify the ABI implicit inputs
based on it.
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 |
2 files changed, 25 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 139f60b..800bd03 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2888,6 +2888,8 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, Register AndMaskSrc = LiveIn; + // TODO: Avoid clearing the high bits if we know workitem id y/z are always + // 0. if (Shift != 0) { auto ShiftAmt = B.buildConstant(S32, Shift); AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0); @@ -4966,6 +4968,12 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI, return true; } +static bool replaceWithConstant(MachineIRBuilder &B, MachineInstr &MI, int64_t C) { + B.buildConstant(MI.getOperand(0).getReg(), C); + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &B = Helper.MIRBuilder; @@ -5069,12 +5077,20 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_implicitarg_ptr: return legalizeImplicitArgPtr(MI, MRI, B); case Intrinsic::amdgcn_workitem_id_x: + if (ST.getMaxWorkitemID(B.getMF().getFunction(), 0) == 0) + return replaceWithConstant(B, MI, 0); return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKITEM_ID_X); case Intrinsic::amdgcn_workitem_id_y: + if (ST.getMaxWorkitemID(B.getMF().getFunction(), 1) == 0) + return replaceWithConstant(B, MI, 0); + return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKITEM_ID_Y); case Intrinsic::amdgcn_workitem_id_z: + if (ST.getMaxWorkitemID(B.getMF().getFunction(), 2) == 0) + return replaceWithConstant(B, MI, 0); + return legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKITEM_ID_Z); case Intrinsic::amdgcn_workgroup_id_x: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b48a380..af96b28 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6756,14 +6756,23 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); case Intrinsic::amdgcn_workitem_id_x: + if (Subtarget->getMaxWorkitemID(MF.getFunction(), 0) == 0) + return DAG.getConstant(0, DL, MVT::i32); + return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, SDLoc(DAG.getEntryNode()), MFI->getArgInfo().WorkItemIDX); case Intrinsic::amdgcn_workitem_id_y: + if (Subtarget->getMaxWorkitemID(MF.getFunction(), 1) == 0) + return DAG.getConstant(0, DL, MVT::i32); + return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, SDLoc(DAG.getEntryNode()), MFI->getArgInfo().WorkItemIDY); case Intrinsic::amdgcn_workitem_id_z: + if (Subtarget->getMaxWorkitemID(MF.getFunction(), 2) == 0) + return DAG.getConstant(0, DL, MVT::i32); + return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, SDLoc(DAG.getEntryNode()), MFI->getArgInfo().WorkItemIDZ); |
