aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
diff options
context:
space:
mode:
authorDiana Picus <Diana-Magda.Picus@amd.com>2025-08-15 10:12:47 +0200
committerGitHub <noreply@github.com>2025-08-15 10:12:47 +0200
commitac005e16f617451ad2dc0c794661159cb8111f72 (patch)
treee9f8ad6b910ca90dacbb7aaf2b3c170b7303f1cb /llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
parentfdd2d4df1212ef6b7c8e0dfbba8f2a24343d2d9d (diff)
downloadllvm-ac005e16f617451ad2dc0c794661159cb8111f72.zip
llvm-ac005e16f617451ad2dc0c794661159cb8111f72.tar.gz
llvm-ac005e16f617451ad2dc0c794661159cb8111f72.tar.bz2
Reapply "[AMDGPU] Intrinsic for launching whole wave functions" (#153584)
This reverts commit 14cd1339318b16e08c1363ec6896bd7d1e4ae281. The buildbot failure seems to have been a cmake issue which has been discussed in more detail in this Discourse post: https://discourse.llvm.org/t/cmake-doesnt-regenerate-all-tablegen-target-files/87901 If any buildbots fail to select arbitrary intrinsics with this patch, it's worth considering using clean builds with ccache instead of incremental builds, as recommended here: https://llvm.org/docs/HowToAddABuilder.html#:~:text=Use%20CCache%20and%20NOT%20incremental%20builds The original commit message for this patch: Add the llvm.amdgcn.call.whole.wave intrinsic for calling whole wave functions. This will take as its first argument the callee with the amdgpu_gfx_whole_wave calling convention, followed by the call parameters which must match the signature of the callee except for the first function argument (the i1 original EXEC mask, which doesn't need to be passed in). Indirect calls are not allowed. Make direct calls to amdgpu_gfx_whole_wave functions a verifier error. Tail calls are handled in a future patch.
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp37
1 files changed, 37 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7669498..2eaab02 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7990,6 +7990,43 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
HasTailCall = true;
return;
}
+ case Intrinsic::amdgcn_call_whole_wave: {
+ TargetLowering::ArgListTy Args;
+
+ // The first argument is the callee. Skip it when assembling the call args.
+ TargetLowering::ArgListEntry Arg;
+ for (unsigned Idx = 1; Idx < I.arg_size(); ++Idx) {
+ Arg.Node = getValue(I.getArgOperand(Idx));
+ Arg.Ty = I.getArgOperand(Idx)->getType();
+ Arg.setAttributes(&I, Idx);
+ Args.push_back(Arg);
+ }
+
+ SDValue ConvControlToken;
+ if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ auto *Token = Bundle->Inputs[0].get();
+ ConvControlToken = getValue(Token);
+ }
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc())
+ .setChain(getRoot())
+ .setCallee(CallingConv::AMDGPU_Gfx_WholeWave, I.getType(),
+ getValue(I.getArgOperand(0)), std::move(Args))
+ .setTailCall(false)
+ .setIsPreallocated(
+ I.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
+ .setConvergent(I.isConvergent())
+ .setConvergenceControlToken(ConvControlToken);
+ CLI.CB = &I;
+
+ std::pair<SDValue, SDValue> Result =
+ lowerInvokable(CLI, /*EHPadBB=*/nullptr);
+
+ if (Result.first.getNode())
+ setValue(&I, Result.first);
+ return;
+ }
case Intrinsic::ptrmask: {
SDValue Ptr = getValue(I.getOperand(0));
SDValue Mask = getValue(I.getOperand(1));