diff options
author | Diana Picus <Diana-Magda.Picus@amd.com> | 2025-08-15 10:12:47 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-15 10:12:47 +0200 |
commit | ac005e16f617451ad2dc0c794661159cb8111f72 (patch) | |
tree | e9f8ad6b910ca90dacbb7aaf2b3c170b7303f1cb /llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | |
parent | fdd2d4df1212ef6b7c8e0dfbba8f2a24343d2d9d (diff) | |
download | llvm-ac005e16f617451ad2dc0c794661159cb8111f72.zip llvm-ac005e16f617451ad2dc0c794661159cb8111f72.tar.gz llvm-ac005e16f617451ad2dc0c794661159cb8111f72.tar.bz2 |
Reapply "[AMDGPU] Intrinsic for launching whole wave functions" (#153584)
This reverts commit 14cd1339318b16e08c1363ec6896bd7d1e4ae281. The
buildbot failure seems to have been a cmake issue which has been
discussed in more detail in this Discourse post:
https://discourse.llvm.org/t/cmake-doesnt-regenerate-all-tablegen-target-files/87901
If any buildbots fail to select arbitrary intrinsics with this patch,
it's worth considering using clean builds with ccache instead of
incremental builds, as recommended here:
https://llvm.org/docs/HowToAddABuilder.html#:~:text=Use%20CCache%20and%20NOT%20incremental%20builds
The original commit message for this patch:
Add the llvm.amdgcn.call.whole.wave intrinsic for calling whole wave
functions. This will take as its first argument the callee with the
amdgpu_gfx_whole_wave calling convention, followed by the call
parameters which must match the signature of the callee except for the
first function argument (the i1 original EXEC mask, which doesn't need
to be passed in). Indirect calls are not allowed.
Make direct calls to amdgpu_gfx_whole_wave functions a verifier error.
Tail calls are handled in a future patch.
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 7669498..2eaab02 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7990,6 +7990,43 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, HasTailCall = true; return; } + case Intrinsic::amdgcn_call_whole_wave: { + TargetLowering::ArgListTy Args; + + // The first argument is the callee. Skip it when assembling the call args. + TargetLowering::ArgListEntry Arg; + for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) { + Arg.Node = getValue(I.getArgOperand(Idx)); + Arg.Ty = I.getArgOperand(Idx)->getType(); + Arg.setAttributes(&I, Idx); + Args.push_back(Arg); + } + + SDValue ConvControlToken; + if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + ConvControlToken = getValue(Token); + } + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(), + getValue(I.getArgOperand(0)), std::move(Args)) + .setTailCall(false) + .setIsPreallocated( + I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) + .setConvergent(I.isConvergent()) + .setConvergenceControlToken(ConvControlToken); + CLI.CB = &I; + + std::pair<SDValue, SDValue> Result = + lowerInvokable(CLI, /*EHPadBB=*/nullptr); + + if (Result.first.getNode()) + setValue(&I, Result.first); + return; + } case Intrinsic::ptrmask: { SDValue Ptr = getValue(I.getOperand(0)); SDValue Mask = getValue(I.getOperand(1)); |