From ef9ec4bbcca2fa4f64df47bc426f1d1c59ea47e2 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 2 Mar 2023 18:35:15 -0800 Subject: [OpenMP] Add the `ompx_attribute` clause for target directives CUDA and HIP have kernel attributes to tune the code generation (in the backend). To reuse this functionality for OpenMP target regions we introduce the `ompx_attribute` clause that takes these kernel attributes and emits code as if they had been attached to the kernel fuction (which is implicitly generated). To limit the impact, we only support three kernel attributes: `amdgpu_waves_per_eu`, for AMDGPU `amdgpu_flat_work_group_size`, for AMDGPU `launch_bounds`, for NVPTX The existing implementations of those attributes are used for error checking and code generation. `ompx_attribute` can be attached to any executable target region and it can hold more than one kernel attribute. Differential Revision: https://reviews.llvm.org/D156184 --- clang/lib/CodeGen/CodeGenModule.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'clang/lib/CodeGen/CodeGenModule.h') diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 05cb217..f5fd944 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1557,6 +1557,21 @@ public: /// because we'll lose all important information after each repl. void moveLazyEmissionStates(CodeGenModule *NewBuilder); + /// Emit the IR encoding to attach the CUDA launch bounds attribute to \p F. + void handleCUDALaunchBoundsAttr(llvm::Function *F, + const CUDALaunchBoundsAttr *A); + + /// Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute + /// to \p F. Alternatively, the work group size can be taken from a \p + /// ReqdWGS. + void handleAMDGPUFlatWorkGroupSizeAttr( + llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, + const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr); + + /// Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to \p F. + void handleAMDGPUWavesPerEUAttr(llvm::Function *F, + const AMDGPUWavesPerEUAttr *A); + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, -- cgit v1.1