aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CodeGenModule.h
diff options
context:
space:
mode:
authorJohannes Doerfert <johannes@jdoerfert.de>2023-10-26 14:45:07 -0700
committerGitHub <noreply@github.com>2023-10-26 14:45:07 -0700
commit0ba57c8bbac0e6c5cc5a85b615b801f9b8749017 (patch)
tree536e46b632efea77a0a9d9c09ad2bf1355217184 /clang/lib/CodeGen/CodeGenModule.h
parent57cebc709df0ce839807b852432eccf345d8a63e (diff)
downloadllvm-0ba57c8bbac0e6c5cc5a85b615b801f9b8749017.zip
llvm-0ba57c8bbac0e6c5cc5a85b615b801f9b8749017.tar.gz
llvm-0ba57c8bbac0e6c5cc5a85b615b801f9b8749017.tar.bz2
[OpenMP] Pass min/max thread and team count to the OMPIRBuilder (#70247)
We now provide the information about the min/max thread and team count from to the OMPIRBuilder, no matter what the source was. That means we unify `thread_limit`, `num_teams`, `num_threads` handling with the target specific attriutes (`__launch_bounds__` and `amdgpu_flat_work_group_size`). This is in preparation to pass the values to the runtime, and to allow the middle-end (OpenMP-opt) to tighten the values if it seems appropriate. There is no "real" change after this commit.
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.h')
-rw-r--r--clang/lib/CodeGen/CodeGenModule.h14
1 files changed, 11 insertions, 3 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 073b471..793861f 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1543,15 +1543,23 @@ public:
void moveLazyEmissionStates(CodeGenModule *NewBuilder);
/// Emit the IR encoding to attach the CUDA launch bounds attribute to \p F.
+ /// If \p MaxThreadsVal is not nullptr, the max threads value is stored in it,
+ /// if a valid one was found.
void handleCUDALaunchBoundsAttr(llvm::Function *F,
- const CUDALaunchBoundsAttr *A);
+ const CUDALaunchBoundsAttr *A,
+ int32_t *MaxThreadsVal = nullptr,
+ int32_t *MinBlocksVal = nullptr,
+ int32_t *MaxClusterRankVal = nullptr);
/// Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute
/// to \p F. Alternatively, the work group size can be taken from a \p
- /// ReqdWGS.
+ /// ReqdWGS. If \p MinThreadsVal is not nullptr, the min threads value is
+ /// stored in it, if a valid one was found. If \p MaxThreadsVal is not
+ /// nullptr, the max threads value is stored in it, if a valid one was found.
void handleAMDGPUFlatWorkGroupSizeAttr(
llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A,
- const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr);
+ const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr,
+ int32_t *MinThreadsVal = nullptr, int32_t *MaxThreadsVal = nullptr);
/// Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to \p F.
void handleAMDGPUWavesPerEUAttr(llvm::Function *F,