aboutsummaryrefslogtreecommitdiff
path: root/openmp/runtime
diff options
context:
space:
mode:
authorTerry Wilmarth <terry.l.wilmarth@intel.com>2024-06-24 15:39:18 -0400
committerGitHub <noreply@github.com>2024-06-24 15:39:18 -0400
commitd30b082fd4aeba0a3a99c3f17dbffe6691f859cc (patch)
treedc38dccae31e17300c385cd2beaafdc151ec6d03 /openmp/runtime
parentf2d3d829b97a221c9ce3a3467a20ea51bb29ecbd (diff)
downloadllvm-d30b082fd4aeba0a3a99c3f17dbffe6691f859cc.zip
llvm-d30b082fd4aeba0a3a99c3f17dbffe6691f859cc.tar.gz
llvm-d30b082fd4aeba0a3a99c3f17dbffe6691f859cc.tar.bz2
[OpenMP] Add num_threads clause list format and strict modifier support (#85466)
Add support to the runtime for 6.0 spec features that allow num_threads clause to take a list, and also make use of the strict modifier. Provides new compiler interface functions for these features.
Diffstat (limited to 'openmp/runtime')
-rw-r--r--openmp/runtime/src/dllexports5
-rw-r--r--openmp/runtime/src/kmp.h42
-rw-r--r--openmp/runtime/src/kmp_csupport.cpp44
-rw-r--r--openmp/runtime/src/kmp_runtime.cpp134
-rw-r--r--openmp/runtime/test/parallel/omp_parallel_num_threads_list.c212
-rw-r--r--openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c103
6 files changed, 520 insertions, 20 deletions
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 0d49643..747b828 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -1268,6 +1268,11 @@ kmp_set_disp_num_buffers 890
__kmpc_atomic_val_8_cas_cpt 2158
%endif
+ # No longer need to put ordinal numbers
+ __kmpc_push_num_threads_list
+ __kmpc_push_num_threads_strict
+ __kmpc_push_num_threads_list_strict
+
%endif
__kmpc_set_thread_limit
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index f625e84..c8d821b 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -532,6 +532,15 @@ enum clock_function_type {
enum mic_type { non_mic, mic1, mic2, mic3, dummy };
#endif
+// OpenMP 3.1 - Nested num threads array
+typedef struct kmp_nested_nthreads_t {
+ int *nth;
+ int size;
+ int used;
+} kmp_nested_nthreads_t;
+
+extern kmp_nested_nthreads_t __kmp_nested_nth;
+
/* -- fast reduction stuff ------------------------------------------------ */
#undef KMP_FAST_REDUCTION_BARRIER
@@ -2965,6 +2974,12 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
/* The data set by the primary thread at reinit, then R/W by the worker */
KMP_ALIGN_CACHE int
th_set_nproc; /* if > 0, then only use this request for the next fork */
+ int *th_set_nested_nth;
+ bool th_nt_strict; // num_threads clause has strict modifier
+ ident_t *th_nt_loc; // loc for strict modifier
+ int th_nt_sev; // error severity for strict modifier
+ const char *th_nt_msg; // error message for strict modifier
+ int th_set_nested_nth_sz;
#if KMP_NESTED_HOT_TEAMS
kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
#endif
@@ -3206,6 +3221,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
void *t_stack_id; // team specific stack stitching id (for ittnotify)
#endif /* USE_ITT_BUILD */
distributedBarrier *b; // Distributed barrier data associated with team
+ kmp_nested_nthreads_t *t_nested_nth;
} kmp_base_team_t;
// Assert that the list structure fits and aligns within
@@ -3542,15 +3558,6 @@ extern enum mic_type __kmp_mic_type;
extern double __kmp_load_balance_interval; // load balance algorithm interval
#endif /* USE_LOAD_BALANCE */
-// OpenMP 3.1 - Nested num threads array
-typedef struct kmp_nested_nthreads_t {
- int *nth;
- int size;
- int used;
-} kmp_nested_nthreads_t;
-
-extern kmp_nested_nthreads_t __kmp_nested_nth;
-
#if KMP_USE_ADAPTIVE_LOCKS
// Parameters for the speculative lock backoff system.
@@ -3785,6 +3792,11 @@ extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);
___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)
extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
+extern void __kmp_push_num_threads_list(ident_t *loc, int gtid,
+ kmp_uint32 list_length,
+ int *num_threads_list);
+extern void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev,
+ const char *msg);
extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
kmp_proc_bind_t proc_bind);
@@ -4423,6 +4435,18 @@ KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc);
KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_threads);
+KMP_EXPORT void __kmpc_push_num_threads_strict(ident_t *loc,
+ kmp_int32 global_tid,
+ kmp_int32 num_threads,
+ int severity,
+ const char *message);
+
+KMP_EXPORT void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,
+ kmp_uint32 list_length,
+ kmp_int32 *num_threads_list);
+KMP_EXPORT void __kmpc_push_num_threads_list_strict(
+ ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length,
+ kmp_int32 *num_threads_list, int severity, const char *message);
KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
int proc_bind);
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index f45fe64..d638acd 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -237,6 +237,50 @@ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
__kmp_push_num_threads(loc, global_tid, num_threads);
}
+void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid,
+ kmp_int32 num_threads, int severity,
+ const char *message) {
+ __kmp_push_num_threads(loc, global_tid, num_threads);
+ __kmp_set_strict_num_threads(loc, global_tid, severity, message);
+}
+
+/*!
+@ingroup PARALLEL
+@param loc source location information
+@param global_tid global thread number
+@param list_length number of entries in the num_threads_list array
+@param num_threads_list array of numbers of threads requested for this parallel
+construct and subsequent nested parallel constructs
+
+Set the number of threads to be used by the next fork spawned by this thread,
+and some nested forks as well.
+This call is only required if the parallel construct has a `num_threads` clause
+that has a list of integers as the argument.
+*/
+void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,
+ kmp_uint32 list_length,
+ kmp_int32 *num_threads_list) {
+ KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=",
+ global_tid));
+ KA_TRACE(20, ("%d", num_threads_list[0]));
+#ifdef KMP_DEBUG
+ for (kmp_uint32 i = 1; i < list_length; ++i)
+ KA_TRACE(20, (", %d", num_threads_list[i]));
+#endif
+ KA_TRACE(20, ("/n"));
+
+ __kmp_assert_valid_gtid(global_tid);
+ __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
+}
+
+void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid,
+ kmp_uint32 list_length,
+ kmp_int32 *num_threads_list,
+ int severity, const char *message) {
+ __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
+ __kmp_set_strict_num_threads(loc, global_tid, severity, message);
+}
+
void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
/* the num_threads are automatically popped */
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 74b44b5..b49c44f 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -113,6 +113,21 @@ void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
int new_nthreads);
void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
+static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr,
+ int level) {
+ kmp_nested_nthreads_t *new_nested_nth =
+ (kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC(
+ sizeof(kmp_nested_nthreads_t));
+ int new_size = level + thr->th.th_set_nested_nth_sz;
+ new_nested_nth->nth = (int *)KMP_INTERNAL_MALLOC(new_size * sizeof(int));
+ for (int i = 0; i < level + 1; ++i)
+ new_nested_nth->nth[i] = 0;
+ for (int i = level + 1, j = 1; i < new_size; ++i, ++j)
+ new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
+ new_nested_nth->size = new_nested_nth->used = new_size;
+ return new_nested_nth;
+}
+
/* Calculate the identifier of the current thread */
/* fast (and somewhat portable) way to get unique identifier of executing
thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
@@ -930,6 +945,11 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
__kmp_get_gtid(), new_nthreads, set_nthreads));
}
#endif // KMP_DEBUG
+
+ if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {
+ __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
+ this_thr->th.th_nt_msg);
+ }
return new_nthreads;
}
@@ -1265,6 +1285,10 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
serial_team->t.t_serialized = 1;
serial_team->t.t_nproc = 1;
serial_team->t.t_parent = this_thr->th.th_team;
+ if (this_thr->th.th_team->t.t_nested_nth)
+ serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
+ else
+ serial_team->t.t_nested_nth = &__kmp_nested_nth;
// Save previous team's task state on serial team structure
serial_team->t.t_primary_task_state = this_thr->th.th_task_state;
serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
@@ -1286,9 +1310,11 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
// Thread value exists in the nested nthreads array for the next nested
// level
- if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
- this_thr->th.th_current_task->td_icvs.nproc =
- __kmp_nested_nth.nth[level + 1];
+ kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
+ if (this_thr->th.th_team->t.t_nested_nth)
+ nested_nth = this_thr->th.th_team->t.t_nested_nth;
+ if (nested_nth->used && (level + 1 < nested_nth->used)) {
+ this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
}
if (__kmp_nested_proc_bind.used &&
@@ -1339,10 +1365,14 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
int level = this_thr->th.th_team->t.t_level;
// Thread value exists in the nested nthreads array for the next nested
// level
- if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
- this_thr->th.th_current_task->td_icvs.nproc =
- __kmp_nested_nth.nth[level + 1];
+
+ kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
+ if (serial_team->t.t_nested_nth)
+ nested_nth = serial_team->t.t_nested_nth;
+ if (nested_nth->used && (level + 1 < nested_nth->used)) {
+ this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
}
+
serial_team->t.t_level++;
KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
"of serial team %p to %d\n",
@@ -2093,9 +2123,18 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// See if we need to make a copy of the ICVs.
int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
- if ((level + 1 < __kmp_nested_nth.used) &&
- (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
- nthreads_icv = __kmp_nested_nth.nth[level + 1];
+ kmp_nested_nthreads_t *nested_nth = NULL;
+ if (!master_th->th.th_set_nested_nth &&
+ (level + 1 < parent_team->t.t_nested_nth->used) &&
+ (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {
+ nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];
+ } else if (master_th->th.th_set_nested_nth) {
+ nested_nth = __kmp_override_nested_nth(master_th, level);
+ if ((level + 1 < nested_nth->used) &&
+ (nested_nth->nth[level + 1] != nthreads_icv))
+ nthreads_icv = nested_nth->nth[level + 1];
+ else
+ nthreads_icv = 0; // don't update
} else {
nthreads_icv = 0; // don't update
}
@@ -2204,6 +2243,24 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
+ // Check if hot team has potentially outdated list, and if so, free it
+ if (team->t.t_nested_nth &&
+ team->t.t_nested_nth != parent_team->t.t_nested_nth) {
+ KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
+ KMP_INTERNAL_FREE(team->t.t_nested_nth);
+ team->t.t_nested_nth = NULL;
+ }
+ team->t.t_nested_nth = parent_team->t.t_nested_nth;
+ if (master_th->th.th_set_nested_nth) {
+ if (!nested_nth)
+ nested_nth = __kmp_override_nested_nth(master_th, level);
+ team->t.t_nested_nth = nested_nth;
+ KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
+ master_th->th.th_set_nested_nth = NULL;
+ master_th->th.th_set_nested_nth_sz = 0;
+ master_th->th.th_nt_strict = false;
+ }
+
// Update the floating point rounding in the team if required.
propagateFPControl(team);
#if OMPD_SUPPORT
@@ -3337,6 +3394,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
root_team->t.t_serialized = 1;
// TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
root_team->t.t_sched.sched = r_sched.sched;
+ root_team->t.t_nested_nth = &__kmp_nested_nth;
KA_TRACE(
20,
("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
@@ -3374,6 +3432,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
// TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
hot_team->t.t_sched.sched = r_sched.sched;
hot_team->t.t_size_changed = 0;
+ hot_team->t.t_nested_nth = &__kmp_nested_nth;
}
#ifdef KMP_DEBUG
@@ -4240,6 +4299,7 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
else // no tasking --> always safe to reap
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
this_thr->th.th_set_proc_bind = proc_bind_default;
+
#if KMP_AFFINITY_SUPPORTED
this_thr->th.th_new_place = this_thr->th.th_current_place;
#endif
@@ -4492,6 +4552,11 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
/* allocate space for it. */
new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
+ new_thr->th.th_nt_strict = false;
+ new_thr->th.th_nt_loc = NULL;
+ new_thr->th.th_nt_sev = severity_fatal;
+ new_thr->th.th_nt_msg = NULL;
+
TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
@@ -4602,6 +4667,9 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
new_thr->th.th_active_in_pool = FALSE;
TCW_4(new_thr->th.th_active, TRUE);
+ new_thr->th.th_set_nested_nth = NULL;
+ new_thr->th.th_set_nested_nth_sz = 0;
+
/* adjust the global counters */
__kmp_all_nth++;
__kmp_nth++;
@@ -5398,7 +5466,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
} // Check changes in number of threads
- kmp_info_t *master = team->t.t_threads[0];
if (master->th.th_teams_microtask) {
for (f = 1; f < new_nproc; ++f) {
// propagate teams construct specific info to workers
@@ -5504,6 +5571,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
__ompt_team_assign_id(team, ompt_parallel_data);
#endif
+ team->t.t_nested_nth = NULL;
+
KMP_MB();
return team;
@@ -5575,6 +5644,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
KMP_MB();
+ team->t.t_nested_nth = NULL;
+
KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
team->t.t_id));
@@ -5677,6 +5748,14 @@ void __kmp_free_team(kmp_root_t *root,
}
}
+ // Before clearing parent pointer, check if nested_nth list should be freed
+ if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
+ team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
+ KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
+ KMP_INTERNAL_FREE(team->t.t_nested_nth);
+ }
+ team->t.t_nested_nth = NULL;
+
// Reset pointer to parent team only for non-hot teams.
team->t.t_parent = NULL;
team->t.t_level = 0;
@@ -7774,7 +7853,6 @@ int __kmp_invoke_teams_master(int gtid) {
encountered by this team. since this should be enclosed in the forkjoin
critical section it should avoid race conditions with asymmetrical nested
parallelism */
-
void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
kmp_info_t *thr = __kmp_threads[gtid];
@@ -7782,6 +7860,39 @@ void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
thr->th.th_set_nproc = num_threads;
}
+void __kmp_push_num_threads_list(ident_t *id, int gtid, kmp_uint32 list_length,
+ int *num_threads_list) {
+ kmp_info_t *thr = __kmp_threads[gtid];
+
+ KMP_DEBUG_ASSERT(list_length > 1);
+
+ if (num_threads_list[0] > 0)
+ thr->th.th_set_nproc = num_threads_list[0];
+ thr->th.th_set_nested_nth =
+ (int *)KMP_INTERNAL_MALLOC(list_length * sizeof(int));
+ for (kmp_uint32 i = 0; i < list_length; ++i)
+ thr->th.th_set_nested_nth[i] = num_threads_list[i];
+ thr->th.th_set_nested_nth_sz = list_length;
+}
+
+void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev,
+ const char *msg) {
+ kmp_info_t *thr = __kmp_threads[gtid];
+ thr->th.th_nt_strict = true;
+ thr->th.th_nt_loc = loc;
+ // if sev is unset make fatal
+ if (sev == severity_warning)
+ thr->th.th_nt_sev = sev;
+ else
+ thr->th.th_nt_sev = severity_fatal;
+ // if msg is unset, use an appropriate message
+ if (msg)
+ thr->th.th_nt_msg = msg;
+ else
+ thr->th.th_nt_msg = "Cannot form team with number of threads specified by "
+ "strict num_threads clause.";
+}
+
static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
int num_threads) {
KMP_DEBUG_ASSERT(thr);
@@ -8238,6 +8349,7 @@ void __kmp_cleanup(void) {
__kmp_nested_nth.nth = NULL;
__kmp_nested_nth.size = 0;
__kmp_nested_nth.used = 0;
+
KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
__kmp_nested_proc_bind.bind_types = NULL;
__kmp_nested_proc_bind.size = 0;
diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c
new file mode 100644
index 0000000..b5abbc4
--- /dev/null
+++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c
@@ -0,0 +1,212 @@
+// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 %libomp-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+// When compiler supports num_threads clause list format, remove the following
+// and use num_threads clause directly
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+int __kmpc_global_thread_num(void *loc);
+void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length,
+ int *list);
+
+#if defined(__cplusplus)
+}
+#endif
+
+int test_omp_parallel_num_threads_list() {
+ int num_failed = 0;
+
+// Initially, 5 levels specified via OMP_NUM_THREADS with 2 threads per level
+// Check top 3 levels
+#pragma omp parallel reduction(+ : num_failed) // 1st level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel reduction(+ : num_failed) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel reduction(+ : num_failed) // 3rd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2);
+ } // end 3rd level parallel
+ } // end 2nd level parallel
+ } // end 1st level parallel
+
+// Make sure that basic single element num_threads clause works
+#pragma omp parallel reduction(+ : num_failed) num_threads(4) // 1st level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 4);
+#pragma omp parallel reduction(+ : num_failed) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+#pragma omp parallel reduction(+ : num_failed) // 3rd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+ } // end 3rd level parallel
+ } // end 2nd level parallel
+ } // end 1st level parallel
+
+// Check that basic single element num_threads clause works on second level
+#pragma omp parallel reduction(+ : num_failed) // 1st level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+#pragma omp parallel reduction(+ : num_failed) num_threads(4) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 4);
+#pragma omp parallel reduction(+ : num_failed) // 3rd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+ } // end 3rd level parallel
+ } // end 2nd level parallel
+ } // end 1st level parallel
+
+ // Try a short list. It should completely overwrite the old settings.
+ // We need to use the compiler interface for now.
+ int threads[2] = {3, 3};
+ __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
+ threads);
+#pragma omp parallel reduction(+ : num_failed) // num_threads(3,3) // 1st level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 3);
+#pragma omp parallel reduction(+ : num_failed) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 3);
+#pragma omp parallel reduction(+ : num_failed) // 3rd level
+ {
+// NOTE: should just keep using last element in list, to nesting depth
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 3);
+ } // end 3rd level parallel
+ } // end 2nd level parallel
+ } // end 1st level parallel
+
+// Similar, but at a lower level.
+#pragma omp parallel reduction(+ : num_failed) // 1st level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+ int threads[2] = {3, 3};
+ __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
+ threads);
+#pragma omp parallel reduction(+ : num_failed) // num_clause(3,3) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 3);
+#pragma omp parallel reduction(+ : num_failed) // 3rd level
+ {
+// NOTE: just keep using last element in list, to nesting depth
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 3);
+ } // end 3rd level parallel
+ } // end 2nd level parallel
+// Make sure a second inner parallel is NOT affected by the clause
+#pragma omp parallel reduction(+ : num_failed) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+#pragma omp parallel reduction(+ : num_failed) // 3rd level
+ {
+#pragma omp single
+ // NOTE: just keep using last element in list, to nesting depth
+ num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+ } // end 3rd level parallel
+ } // end 2nd level parallel
+ } // end 1st level parallel
+
+ // Test lists at multiple levels
+ int threads2[2] = {4, 3};
+ __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
+ threads2);
+#pragma omp parallel reduction(+ : num_failed) // num_clause(4,3) // 1st level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 4);
+#pragma omp parallel reduction(+ : num_failed) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 3);
+#pragma omp parallel reduction(+ : num_failed) // 3rd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 3);
+ int threads3[2] = {2, 5};
+ __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
+ threads3);
+#pragma omp parallel reduction(+ : num_failed) // num_clause(2,5) // 4th level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel reduction(+ : num_failed) // 5th level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 5);
+#pragma omp parallel reduction(+ : num_failed) // 6th level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 5);
+ } // end 6th level parallel
+ } // end 5th level parallel
+ } // end 4th level parallel
+#pragma omp parallel reduction(+ : num_failed) // 4th level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 3);
+ } // end 4th level parallel
+ } // end 3rd level parallel
+ } // end 2nd level parallel
+#pragma omp parallel reduction(+ : num_failed) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 3);
+#pragma omp parallel reduction(+ : num_failed) // 3rd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 3);
+ } // end 3rd level parallel
+ } // end 2nd level parallel
+ } // end 1st level parallel
+
+// Now we should be back to the way we started.
+#pragma omp parallel reduction(+ : num_failed) // 1st level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel reduction(+ : num_failed) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel reduction(+ : num_failed) // 3rd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 2);
+ } // end 3rd level parallel
+ } // end 2nd level parallel
+ } // end 1st level parallel
+
+ return (!num_failed);
+}
+
+int main() {
+ int i;
+ int num_failed = 0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if (!test_omp_parallel_num_threads_list()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}
diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c
new file mode 100644
index 0000000..577aa3a
--- /dev/null
+++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c
@@ -0,0 +1,103 @@
+// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 OMP_THREAD_LIMIT=16 \
+// RUN: %libomp-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+// When compiler supports num_threads clause list format and strict modifier,
+// remove the following and use num_threads clause directly
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+int __kmpc_global_thread_num(void *loc);
+void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length,
+ int *list);
+void __kmpc_push_num_threads_strict(void *loc, int gtid, int nth, int sev,
+ const char *msg);
+void __kmpc_push_num_threads_list_strict(void *loc, int gtid, unsigned length,
+ int *list, int sev, const char *msg);
+
+#if defined(__cplusplus)
+}
+#endif
+
+int test_omp_parallel_num_threads_strict() {
+ int num_failed = 0;
+
+// Test regular runtime warning about exceeding thread limit.
+// Tolerate whatever value was given.
+#pragma omp parallel reduction(+ : num_failed) num_threads(22)
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() <= 22);
+
+ // Test with 4 threads and strict -- no problem, no warning.
+ __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 4, 1,
+ "This warning shouldn't happen.");
+#pragma omp parallel reduction(+ : num_failed) // num_threads(strict:4)
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() == 4);
+
+ // Exceed limit, specify user warning message. Tolerate whatever was given.
+ __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 20, 1,
+ "User-supplied warning for strict.");
+#pragma omp parallel reduction(+ : num_failed)
+ // num_threads(strict:20) severity(warning)
+ // message("User-supplied warning for strict.")
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() <= 20);
+
+ // Exceed limit, no user message, use runtime default message for strict.
+ // Tolerate whatever value was given.
+ __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 21, 1,
+ NULL);
+#pragma omp parallel reduction(+ : num_failed) // num_threads(strict:21)
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() <= 21);
+
+ // Exceed limit at top level. Should see user warning message.
+ int threads3[2] = {24, 2};
+ __kmpc_push_num_threads_list_strict(NULL, __kmpc_global_thread_num(NULL), 2,
+ threads3, 1,
+ "User-supplied warning on strict list.");
+#pragma omp parallel reduction(+ : num_failed)
+ // num_threads(strict:24,2) severity(warning)
+ // message("User-supplied warning on strict. list") // 1st level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() <= 24);
+#pragma omp parallel reduction(+ : num_failed) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() <= 2);
+ }
+ }
+
+ // No strict limit at top level. Regular runtime limiting applies.
+ __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
+ threads3);
+#pragma omp parallel reduction(+ : num_failed)
+ // num_threads(24,2) // 1st level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() <= 24);
+#pragma omp parallel reduction(+ : num_failed) // 2nd level
+ {
+#pragma omp single
+ num_failed = num_failed + !(omp_get_num_threads() <= 2);
+ }
+ }
+
+ return (!num_failed);
+}
+
+int main() {
+ int i;
+ int num_failed = 0;
+
+ for (i = 0; i < REPETITIONS; i++) {
+ if (!test_omp_parallel_num_threads_strict()) {
+ num_failed++;
+ }
+ }
+ return num_failed;
+}