aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CodeGenFunction.cpp
diff options
context:
space:
mode:
authorAniket Lal <lalaniket8@gmail.com>2025-04-08 10:29:30 +0530
committerGitHub <noreply@github.com>2025-04-08 10:29:30 +0530
commit642481a4286c9006958274531ee173b347866c50 (patch)
treec9806d0edc1d8b8d8819982730761daa3ad55ad8 /clang/lib/CodeGen/CodeGenFunction.cpp
parent65cede26a6b06ba02c08284fada06c46c0289704 (diff)
downloadllvm-642481a4286c9006958274531ee173b347866c50.zip
llvm-642481a4286c9006958274531ee173b347866c50.tar.gz
llvm-642481a4286c9006958274531ee173b347866c50.tar.bz2
[Clang][OpenCL][AMDGPU] Allow a kernel to call another kernel (#115821)
This feature is currently not supported in the compiler. To facilitate this we emit a stub version of each kernel function body with different name mangling scheme, and replaces the respective kernel call-sites appropriately. Fixes https://github.com/llvm/llvm-project/issues/60313 D120566 was an earlier attempt made to upstream a solution for this issue. --------- Co-authored-by: anikelal <anikelal@amd.com>
Diffstat (limited to 'clang/lib/CodeGen/CodeGenFunction.cpp')
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp20
1 files changed, 20 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index b55003b..232d481 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1595,6 +1595,26 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
// Implicit copy-assignment gets the same special treatment as implicit
// copy-constructors.
emitImplicitAssignmentOperatorBody(Args);
+ } else if (FD->hasAttr<OpenCLKernelAttr>() &&
+ GD.getKernelReferenceKind() == KernelReferenceKind::Kernel) {
+ CallArgList CallArgs;
+ for (unsigned i = 0; i < Args.size(); ++i) {
+ Address ArgAddr = GetAddrOfLocalVar(Args[i]);
+ QualType ArgQualType = Args[i]->getType();
+ RValue ArgRValue = convertTempToRValue(ArgAddr, ArgQualType, Loc);
+ CallArgs.add(ArgRValue, ArgQualType);
+ }
+ GlobalDecl GDStub = GlobalDecl(FD, KernelReferenceKind::Stub);
+ const FunctionType *FT = cast<FunctionType>(FD->getType());
+ CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT);
+ const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall(
+ CallArgs, FT, /*ChainCall=*/false);
+ llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FnInfo);
+ llvm::Constant *GDStubFunctionPointer =
+ CGM.getRawFunctionPointer(GDStub, FTy);
+ CGCallee GDStubCallee = CGCallee::forDirect(GDStubFunctionPointer, GDStub);
+ EmitCall(FnInfo, GDStubCallee, ReturnValueSlot(), CallArgs, nullptr, false,
+ Loc);
} else if (Body) {
EmitFunctionBody(Body);
} else