diff options
author | Aniket Lal <lalaniket8@gmail.com> | 2025-04-08 10:29:30 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-08 10:29:30 +0530 |
commit | 642481a4286c9006958274531ee173b347866c50 (patch) | |
tree | c9806d0edc1d8b8d8819982730761daa3ad55ad8 /clang/lib/CodeGen/CodeGenFunction.cpp | |
parent | 65cede26a6b06ba02c08284fada06c46c0289704 (diff) | |
download | llvm-642481a4286c9006958274531ee173b347866c50.zip llvm-642481a4286c9006958274531ee173b347866c50.tar.gz llvm-642481a4286c9006958274531ee173b347866c50.tar.bz2 |
[Clang][OpenCL][AMDGPU] Allow a kernel to call another kernel (#115821)
This feature is currently not supported in the compiler.
To facilitate this we emit a stub version of each kernel
function body with different name mangling scheme, and
replaces the respective kernel call-sites appropriately.
Fixes https://github.com/llvm/llvm-project/issues/60313
D120566 was an earlier attempt made to upstream a solution
for this issue.
---------
Co-authored-by: anikelal <anikelal@amd.com>
Diffstat (limited to 'clang/lib/CodeGen/CodeGenFunction.cpp')
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.cpp | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index b55003b..232d481 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1595,6 +1595,26 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // Implicit copy-assignment gets the same special treatment as implicit // copy-constructors. emitImplicitAssignmentOperatorBody(Args); + } else if (FD->hasAttr<OpenCLKernelAttr>() && + GD.getKernelReferenceKind() == KernelReferenceKind::Kernel) { + CallArgList CallArgs; + for (unsigned i = 0; i < Args.size(); ++i) { + Address ArgAddr = GetAddrOfLocalVar(Args[i]); + QualType ArgQualType = Args[i]->getType(); + RValue ArgRValue = convertTempToRValue(ArgAddr, ArgQualType, Loc); + CallArgs.add(ArgRValue, ArgQualType); + } + GlobalDecl GDStub = GlobalDecl(FD, KernelReferenceKind::Stub); + const FunctionType *FT = cast<FunctionType>(FD->getType()); + CGM.getTargetCodeGenInfo().setOCLKernelStubCallingConvention(FT); + const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall( + CallArgs, FT, /*ChainCall=*/false); + llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FnInfo); + llvm::Constant *GDStubFunctionPointer = + CGM.getRawFunctionPointer(GDStub, FTy); + CGCallee GDStubCallee = CGCallee::forDirect(GDStubFunctionPointer, GDStub); + EmitCall(FnInfo, GDStubCallee, ReturnValueSlot(), CallArgs, nullptr, false, + Loc); } else if (Body) { EmitFunctionBody(Body); } else |