aboutsummaryrefslogtreecommitdiff
path: root/openmp
diff options
context:
space:
mode:
authorJohannes Doerfert <johannes@jdoerfert.de>2022-08-10 22:26:31 -0500
committerJohannes Doerfert <johannes@jdoerfert.de>2022-08-11 09:55:56 -0500
commita8cda3290944687b4fd0138e63cd980ea497a438 (patch)
tree182d18dac7104242e78bde030183c6aba48e577a /openmp
parent6e19e6ce36e44554ac9fbf8b2780de05e922c849 (diff)
downloadllvm-a8cda3290944687b4fd0138e63cd980ea497a438.zip
llvm-a8cda3290944687b4fd0138e63cd980ea497a438.tar.gz
llvm-a8cda3290944687b4fd0138e63cd980ea497a438.tar.bz2
[OpenMP][FIX] Ensure __kmpc_kernel_parallel is reachable
The problem is we create the call to __kmpc_kernel_parallel in the openmp-opt pass but while we optimize the code, the call is not there yet. Thus, we assume we never reach it from __kmpc_target_deinit. That allows us to remove the store in there (`ParallelRegionFn = nullptr`), which leads to bad results later on. This is a shortstop solution until we come up with something better. Fixes https://github.com/llvm/llvm-project/issues/57064
Diffstat (limited to 'openmp')
-rw-r--r--openmp/libomptarget/DeviceRTL/src/Kernel.cpp16
1 files changed, 14 insertions, 2 deletions
diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp
index 74c22a6..d682652 100644
--- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp
@@ -35,7 +35,7 @@ static void genericStateMachine(IdentTy *Ident) {
uint32_t TId = mapping::getThreadIdInBlock();
do {
- ParallelRegionFnTy WorkFn = 0;
+ ParallelRegionFnTy WorkFn = nullptr;
// Wait for the signal that we have a new work function.
synchronize::threads();
@@ -100,8 +100,20 @@ int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode,
// doing any work. mapping::getBlockSize() does not include any of the main
// thread's warp, so none of its threads can ever be active worker threads.
if (UseGenericStateMachine &&
- mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD))
+ mapping::getThreadIdInBlock() < mapping::getBlockSize(IsSPMD)) {
genericStateMachine(Ident);
+ } else {
+ // Retrieve the work function just to ensure we always call
+ // __kmpc_kernel_parallel even if a custom state machine is used.
+ // TODO: this is not super pretty. The problem is we create the call to
+ // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it is
+ // not there yet. Thus, we assume we never reach it from
+ // __kmpc_target_deinit. That allows us to remove the store in there to
+ // ParallelRegionFn, which leads to bad results later on.
+ ParallelRegionFnTy WorkFn = nullptr;
+ __kmpc_kernel_parallel(&WorkFn);
+ ASSERT(WorkFn == nullptr);
+ }
return mapping::getThreadIdInBlock();
}