aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShilei Tian <tianshilei1992@gmail.com>2021-03-18 18:25:21 -0400
committerTom Stellard <tstellar@redhat.com>2021-03-29 16:29:53 -0700
commitf43958b7c497c526b238607624ee0069888f4c98 (patch)
tree8238f8b3f16bd660fef732cb4f3be86f7ef20eaa
parente94372d1b395a6461e7d973917b3a3c29563a5e6 (diff)
downloadllvm-f43958b7c497c526b238607624ee0069888f4c98.zip
llvm-f43958b7c497c526b238607624ee0069888f4c98.tar.gz
llvm-f43958b7c497c526b238607624ee0069888f4c98.tar.bz2
[OpenMP] Fixed a crash in hidden helper thread
It is reported that after enabling hidden helper thread, the program can hit the assertion `new_gtid < __kmp_threads_capacity` sometimes. The root cause is explained as follows. Let's say the default `__kmp_threads_capacity` is `N`. If hidden helper thread is enabled, `__kmp_threads_capacity` will be offset to `N+8` by default. If the number of threads we need exceeds `N+8`, e.g. via `num_threads` clause, we need to expand `__kmp_threads`. In `__kmp_expand_threads`, the expansion starts from `__kmp_threads_capacity`, and repeatedly doubling it until the new capacity meets the requirement. Let's assume the new requirement is `Y`. If `Y` happens to meet the constraint `(N+8)*2^X=Y` where `X` is the number of iterations, the new capacity is not enough because we have 8 slots for hidden helper threads. Here is an example. ``` #include <vector> int main(int argc, char *argv[]) { constexpr const size_t N = 1344; std::vector<int> data(N); #pragma omp parallel for for (unsigned i = 0; i < N; ++i) { data[i] = i; } #pragma omp parallel for num_threads(N) for (unsigned i = 0; i < N; ++i) { data[i] += i; } return 0; } ``` My CPU is 20C40T, then `__kmp_threads_capacity` is 160. After offset, `__kmp_threads_capacity` becomes 168. `1344 = (160+8)*2^3`, then the assertions hit. Reviewed By: protze.joachim Differential Revision: https://reviews.llvm.org/D98838 (cherry picked from commit 2df65f87c1ea81008768e14522e5d9277234ba70)
-rw-r--r--openmp/runtime/src/kmp_runtime.cpp15
-rw-r--r--openmp/runtime/src/kmp_settings.cpp7
-rw-r--r--openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp45
-rw-r--r--openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp31
4 files changed, 94 insertions, 4 deletions
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index a6e32bd..b981f87 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -920,6 +920,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
if (TCR_PTR(__kmp_threads[0]) == NULL) {
--capacity;
}
+ // If it is not for initializing the hidden helper team, we need to take
+ // __kmp_hidden_helper_threads_num out of the capacity because it is included
+ // in __kmp_threads_capacity.
+ if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
+ capacity -= __kmp_hidden_helper_threads_num;
+ }
if (__kmp_nth + new_nthreads -
(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
capacity) {
@@ -3632,6 +3638,13 @@ int __kmp_register_root(int initial_thread) {
--capacity;
}
+ // If it is not for initializing the hidden helper team, we need to take
+ // __kmp_hidden_helper_threads_num out of the capacity because it is included
+ // in __kmp_threads_capacity.
+ if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
+ capacity -= __kmp_hidden_helper_threads_num;
+ }
+
/* see if there are too many threads */
if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
if (__kmp_tp_cached) {
@@ -3664,7 +3677,7 @@ int __kmp_register_root(int initial_thread) {
/* find an available thread slot */
// Don't reassign the zero slot since we need that to only be used by
// initial thread. Slots for hidden helper threads should also be skipped.
- if (initial_thread && __kmp_threads[0] == NULL) {
+ if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
gtid = 0;
} else {
for (gtid = __kmp_hidden_helper_threads_num + 1;
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index b477edb..50f6a05 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -504,9 +504,10 @@ int __kmp_initial_threads_capacity(int req_nproc) {
nth = (4 * __kmp_xproc);
// If hidden helper task is enabled, we initialize the thread capacity with
- // extra
- // __kmp_hidden_helper_threads_num.
- nth += __kmp_hidden_helper_threads_num;
+ // extra __kmp_hidden_helper_threads_num.
+ if (__kmp_enable_hidden_helper) {
+ nth += __kmp_hidden_helper_threads_num;
+ }
if (nth > __kmp_max_nth)
nth = __kmp_max_nth;
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
new file mode 100644
index 0000000..776aee9
--- /dev/null
+++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
@@ -0,0 +1,45 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <omp.h>
+
+#include <algorithm>
+#include <cassert>
+#include <chrono>
+#include <thread>
+#include <vector>
+
+void dummy_root() {
+ // omp_get_max_threads() will do middle initialization
+ int nthreads = omp_get_max_threads();
+ std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+}
+
+int main(int argc, char *argv[]) {
+ const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
+ 4 * omp_get_num_procs()),
+ std::numeric_limits<int>::max());
+
+ std::vector<int> data(N);
+
+ // Create a new thread to initialize the OpenMP RTL. The new thread will not
+ // be taken as the "initial thread".
+ std::thread root(dummy_root);
+
+#pragma omp parallel for num_threads(N)
+ for (unsigned i = 0; i < N; ++i) {
+ data[i] = i;
+ }
+
+#pragma omp parallel for num_threads(N + 1)
+ for (unsigned i = 0; i < N; ++i) {
+ data[i] += i;
+ }
+
+ for (unsigned i = 0; i < N; ++i) {
+ assert(data[i] == 2 * i);
+ }
+
+ root.join();
+
+ return 0;
+}
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
new file mode 100644
index 0000000..a9d394f
--- /dev/null
+++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
@@ -0,0 +1,31 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <omp.h>
+
+#include <algorithm>
+#include <cassert>
+#include <vector>
+
+int main(int argc, char *argv[]) {
+ const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
+ 4 * omp_get_num_procs()),
+ std::numeric_limits<int>::max());
+
+ std::vector<int> data(N);
+
+#pragma omp parallel for num_threads(N)
+ for (unsigned i = 0; i < N; ++i) {
+ data[i] = i;
+ }
+
+#pragma omp parallel for num_threads(N + 1)
+ for (unsigned i = 0; i < N; ++i) {
+ data[i] += i;
+ }
+
+ for (unsigned i = 0; i < N; ++i) {
+ assert(data[i] == 2 * i);
+ }
+
+ return 0;
+}