diff options
author | Johannes Doerfert <johannes@jdoerfert.de> | 2023-10-26 14:45:07 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-26 14:45:07 -0700 |
commit | 0ba57c8bbac0e6c5cc5a85b615b801f9b8749017 (patch) | |
tree | 536e46b632efea77a0a9d9c09ad2bf1355217184 /clang/lib/CodeGen/CodeGenModule.h | |
parent | 57cebc709df0ce839807b852432eccf345d8a63e (diff) | |
download | llvm-0ba57c8bbac0e6c5cc5a85b615b801f9b8749017.zip llvm-0ba57c8bbac0e6c5cc5a85b615b801f9b8749017.tar.gz llvm-0ba57c8bbac0e6c5cc5a85b615b801f9b8749017.tar.bz2 |
[OpenMP] Pass min/max thread and team count to the OMPIRBuilder (#70247)
We now provide the information about the min/max thread and team count
from to the OMPIRBuilder, no matter what the source was. That means we
unify `thread_limit`, `num_teams`, `num_threads` handling with the
target specific attriutes (`__launch_bounds__` and
`amdgpu_flat_work_group_size`). This is in preparation to pass the
values to the runtime, and to allow the middle-end (OpenMP-opt) to
tighten the values if it seems appropriate. There is no "real" change
after this commit.
Diffstat (limited to 'clang/lib/CodeGen/CodeGenModule.h')
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.h | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 073b471..793861f 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1543,15 +1543,23 @@ public: void moveLazyEmissionStates(CodeGenModule *NewBuilder); /// Emit the IR encoding to attach the CUDA launch bounds attribute to \p F. + /// If \p MaxThreadsVal is not nullptr, the max threads value is stored in it, + /// if a valid one was found. void handleCUDALaunchBoundsAttr(llvm::Function *F, - const CUDALaunchBoundsAttr *A); + const CUDALaunchBoundsAttr *A, + int32_t *MaxThreadsVal = nullptr, + int32_t *MinBlocksVal = nullptr, + int32_t *MaxClusterRankVal = nullptr); /// Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute /// to \p F. Alternatively, the work group size can be taken from a \p - /// ReqdWGS. + /// ReqdWGS. If \p MinThreadsVal is not nullptr, the min threads value is + /// stored in it, if a valid one was found. If \p MaxThreadsVal is not + /// nullptr, the max threads value is stored in it, if a valid one was found. void handleAMDGPUFlatWorkGroupSizeAttr( llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, - const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr); + const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr, + int32_t *MinThreadsVal = nullptr, int32_t *MaxThreadsVal = nullptr); /// Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to \p F. void handleAMDGPUWavesPerEUAttr(llvm::Function *F, |