aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--openmp/runtime/src/kmp.h13
-rw-r--r--openmp/runtime/src/kmp_csupport.cpp6
-rw-r--r--openmp/runtime/src/kmp_ftn_entry.h18
-rw-r--r--openmp/runtime/src/kmp_runtime.cpp18
-rw-r--r--openmp/runtime/test/affinity/root-threads-affinity.c197
5 files changed, 242 insertions, 10 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 9c3fbf6..d1f0da8 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2874,6 +2874,9 @@ typedef struct kmp_base_root {
kmp_lock_t r_begin_lock;
volatile int r_begin;
int r_blocktime; /* blocktime for this root and descendants */
+#if KMP_AFFINITY_SUPPORTED
+ int r_affinity_assigned;
+#endif // KMP_AFFINITY_SUPPORTED
} kmp_base_root_t;
typedef union KMP_ALIGN_CACHE kmp_root {
@@ -3495,6 +3498,16 @@ extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
#if KMP_OS_LINUX || KMP_OS_FREEBSD
extern int kmp_set_thread_affinity_mask_initial(void);
#endif
+static inline void __kmp_assign_root_init_mask() {
+ int gtid = __kmp_entry_gtid();
+ kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
+ if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
+ __kmp_affinity_set_init_mask(gtid, TRUE);
+ r->r.r_affinity_assigned = TRUE;
+ }
+}
+#else /* KMP_AFFINITY_SUPPORTED */
+#define __kmp_assign_root_init_mask() /* Nothing */
#endif /* KMP_AFFINITY_SUPPORTED */
// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
// format string is for affinity, so platforms that do not support
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 98d0cdf..1189db1 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -39,6 +39,7 @@ void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
__kmp_str_match_true(env)) {
__kmp_middle_initialize();
+ __kmp_assign_root_init_mask();
KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
} else if (__kmp_ignore_mppbeg() == FALSE) {
// By default __kmp_ignore_mppbeg() returns TRUE.
@@ -2023,6 +2024,7 @@ void ompc_display_affinity(char const *format) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
__kmp_aux_display_affinity(gtid, format);
}
@@ -2035,6 +2037,7 @@ size_t ompc_capture_affinity(char *buffer, size_t buf_size,
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
__kmp_str_buf_init(&capture_buf);
num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
@@ -2093,6 +2096,7 @@ int kmpc_set_affinity_mask_proc(int proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_set_affinity_mask_proc(proc, mask);
#endif
}
@@ -2104,6 +2108,7 @@ int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_unset_affinity_mask_proc(proc, mask);
#endif
}
@@ -2115,6 +2120,7 @@ int kmpc_get_affinity_mask_proc(int proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_get_affinity_mask_proc(proc, mask);
#endif
}
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index 5d8398d..d88bf1e 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -217,6 +217,7 @@ int FTN_STDCALL FTN_SET_AFFINITY(void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_set_affinity(mask);
#endif
}
@@ -228,6 +229,7 @@ int FTN_STDCALL FTN_GET_AFFINITY(void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_get_affinity(mask);
#endif
}
@@ -240,6 +242,7 @@ int FTN_STDCALL FTN_GET_AFFINITY_MAX_PROC(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_get_affinity_max_proc();
#endif
}
@@ -253,6 +256,7 @@ void FTN_STDCALL FTN_CREATE_AFFINITY_MASK(void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
mask_internals = __kmp_affinity_dispatch->allocate_mask();
KMP_CPU_ZERO(mask_internals);
*mask = mask_internals;
@@ -268,6 +272,7 @@ void FTN_STDCALL FTN_DESTROY_AFFINITY_MASK(void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (__kmp_env_consistency_check) {
if (*mask == NULL) {
KMP_FATAL(AffinityInvalidMask, "kmp_destroy_affinity_mask");
@@ -286,6 +291,7 @@ int FTN_STDCALL FTN_SET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_set_affinity_mask_proc(KMP_DEREF proc, mask);
#endif
}
@@ -297,6 +303,7 @@ int FTN_STDCALL FTN_UNSET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_unset_affinity_mask_proc(KMP_DEREF proc, mask);
#endif
}
@@ -308,6 +315,7 @@ int FTN_STDCALL FTN_GET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_get_affinity_mask_proc(KMP_DEREF proc, mask);
#endif
}
@@ -342,6 +350,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_THREADS)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
gtid = __kmp_entry_gtid();
thread = __kmp_threads[gtid];
// return thread -> th.th_team -> t.t_current_task[
@@ -487,6 +496,7 @@ void FTN_STDCALL FTN_DISPLAY_AFFINITY(char const *format, size_t size) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
ConvertedString cformat(format, size);
__kmp_aux_display_affinity(gtid, cformat.get());
@@ -514,6 +524,7 @@ size_t FTN_STDCALL FTN_CAPTURE_AFFINITY(char *buffer, char const *format,
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
__kmp_str_buf_init(&capture_buf);
ConvertedString cformat(format, for_size);
@@ -590,6 +601,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PROCS)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_avail_proc;
#endif
}
@@ -779,6 +791,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PLACES)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
return __kmp_affinity_num_masks;
@@ -794,6 +807,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM_PROCS)(int place_num) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
@@ -819,6 +833,7 @@ void FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_PROC_IDS)(int place_num,
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return;
if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
@@ -844,6 +859,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return -1;
gtid = __kmp_entry_gtid();
@@ -863,6 +879,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PARTITION_NUM_PLACES)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
gtid = __kmp_entry_gtid();
@@ -889,6 +906,7 @@ KMP_EXPAND_NAME(FTN_GET_PARTITION_PLACE_NUMS)(int *place_nums) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return;
gtid = __kmp_entry_gtid();
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 34f9869..72929d8 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -1412,6 +1412,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#endif
+ // Assign affinity to root thread if it hasn't happened yet
+ __kmp_assign_root_init_mask();
+
// Nested level will be an index in the nested nthreads array
level = parent_team->t.t_level;
// used to launch non-serial teams even if nested is not allowed
@@ -3171,6 +3174,9 @@ static void __kmp_initialize_root(kmp_root_t *root) {
root->r.r_active = FALSE;
root->r.r_in_parallel = 0;
root->r.r_blocktime = __kmp_dflt_blocktime;
+#if KMP_AFFINITY_SUPPORTED
+ root->r.r_affinity_assigned = FALSE;
+#endif
/* setup the root team for this task */
/* allocate the root team structure */
@@ -3816,9 +3822,6 @@ int __kmp_register_root(int initial_thread) {
root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
- if (TCR_4(__kmp_init_middle)) {
- __kmp_affinity_set_init_mask(gtid, TRUE);
- }
#endif /* KMP_AFFINITY_SUPPORTED */
root_thread->th.th_def_allocator = __kmp_def_allocator;
root_thread->th.th_prev_level = 0;
@@ -7037,13 +7040,6 @@ static void __kmp_do_middle_initialize(void) {
// number of cores on the machine.
__kmp_affinity_initialize();
- // Run through the __kmp_threads array and set the affinity mask
- // for each root thread that is currently registered with the RTL.
- for (i = 0; i < __kmp_threads_capacity; i++) {
- if (TCR_PTR(__kmp_threads[i]) != NULL) {
- __kmp_affinity_set_init_mask(i, TRUE);
- }
- }
#endif /* KMP_AFFINITY_SUPPORTED */
KMP_ASSERT(__kmp_xproc > 0);
@@ -7165,6 +7161,7 @@ void __kmp_parallel_initialize(void) {
if (!__kmp_init_middle) {
__kmp_do_middle_initialize();
}
+ __kmp_assign_root_init_mask();
__kmp_resume_if_hard_paused();
/* begin initialization */
@@ -7471,6 +7468,7 @@ static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
// Remember the number of threads for inner parallel regions
if (!TCR_4(__kmp_init_middle))
__kmp_middle_initialize(); // get internal globals calculated
+ __kmp_assign_root_init_mask();
KMP_DEBUG_ASSERT(__kmp_avail_proc);
KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
diff --git a/openmp/runtime/test/affinity/root-threads-affinity.c b/openmp/runtime/test/affinity/root-threads-affinity.c
new file mode 100644
index 0000000..b8ed5b29
--- /dev/null
+++ b/openmp/runtime/test/affinity/root-threads-affinity.c
@@ -0,0 +1,197 @@
+// RUN: %libomp-compile && env LIBOMP_NUM_HIDDEN_HELPER_THREADS=0 OMP_PROC_BIND=close OMP_PLACES=cores KMP_AFFINITY=verbose %libomp-run 8 1 4
+// REQUIRED: linux
+//
+// This test pthread_creates 8 root threads before any OpenMP
+// runtime entry is ever called. We have all the root threads
+// register with the runtime by calling omp_set_num_threads(),
+// but this does not initialize their affinity. The fourth root thread
+// then calls a parallel region and we make sure its affinity
+// is correct. We also make sure all the other root threads are
+// free-floating since they have not called into a parallel region.
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include "libomp_test_affinity.h"
+
+volatile int entry_flag = 0;
+volatile int flag = 0;
+volatile int num_roots_arrived = 0;
+int num_roots;
+int spawner = 0;
+pthread_mutex_t lock;
+int register_workers = 0; // boolean
+affinity_mask_t *full_mask;
+
+int __kmpc_global_thread_num(void*);
+
+int get_os_thread_id() {
+ return (int)syscall(SYS_gettid);
+}
+
+int place_and_affinity_match() {
+ int i, max_cpu;
+ char buf[512];
+ affinity_mask_t *mask = affinity_mask_alloc();
+ int place = omp_get_place_num();
+ int num_procs = omp_get_place_num_procs(place);
+ int *ids = (int*)malloc(sizeof(int) * num_procs);
+ omp_get_place_proc_ids(place, ids);
+ get_thread_affinity(mask);
+ affinity_mask_snprintf(buf, sizeof(buf), mask);
+ printf("Primary Thread Place: %d\n", place);
+ printf("Primary Thread mask: %s\n", buf);
+
+ for (i = 0; i < num_procs; ++i) {
+ int cpu = ids[i];
+ if (!affinity_mask_isset(mask, cpu))
+ return 0;
+ }
+
+ max_cpu = AFFINITY_MAX_CPUS;
+ for (i = 0; i < max_cpu; ++i) {
+ int cpu = i;
+ if (affinity_mask_isset(mask, cpu)) {
+ int j, found = 0;
+ for (j = 0; j < num_procs; ++j) {
+ if (ids[j] == cpu) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return 0;
+ }
+ }
+
+ affinity_mask_free(mask);
+ free(ids);
+ return 1;
+}
+
+void* thread_func(void *arg) {
+ int place, nplaces;
+ int root_id = *((int*)arg);
+ int pid = getpid();
+ int tid = get_os_thread_id();
+
+ // Order how the root threads are assigned a gtid in the runtime
+ // i.e., root_id = gtid
+ while (1) {
+ int v = entry_flag;
+ if (v == root_id)
+ break;
+ }
+
+ // If main root thread
+ if (root_id == spawner) {
+ printf("Initial application thread (pid=%d, tid=%d, spawner=%d) reached thread_func (will call OpenMP)\n", pid, tid, spawner);
+ omp_set_num_threads(4);
+ #pragma omp atomic
+ entry_flag++;
+ // Wait for the workers to signal their arrival before #pragma omp parallel
+ while (num_roots_arrived < num_roots - 1) {}
+ // This will trigger the output for KMP_AFFINITY in this case
+ #pragma omp parallel
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ #pragma omp single
+ {
+ printf("Exactly %d threads in the #pragma omp parallel\n",
+ omp_get_num_threads());
+ }
+ #pragma omp critical
+ {
+ printf("OpenMP thread %d: gtid=%d\n", omp_get_thread_num(), gtid);
+ }
+ }
+ flag = 1;
+ if (!place_and_affinity_match()) {
+ fprintf(stderr, "error: place and affinity mask do not match for primary thread\n");
+ exit (EXIT_FAILURE);
+ }
+
+ } else { // If worker root thread
+ // Worker root threads, register with OpenMP through omp_set_num_threads()
+ // if designated to, signal their arrival and then wait for the main root
+ // thread to signal them to exit.
+ printf("New root pthread (pid=%d, tid=%d) reached thread_func\n", pid, tid);
+ if (register_workers)
+ omp_set_num_threads(4);
+ #pragma omp atomic
+ entry_flag++;
+
+ pthread_mutex_lock(&lock);
+ num_roots_arrived++;
+ pthread_mutex_unlock(&lock);
+ while (flag == 0) {}
+
+ // Main check whether root threads' mask is equal to the
+ // initial affinity mask
+ affinity_mask_t *mask = affinity_mask_alloc();
+ get_thread_affinity(mask);
+ if (!affinity_mask_equal(mask, full_mask)) {
+ char buf[1024];
+ printf("root thread %d mask: ", root_id);
+ affinity_mask_snprintf(buf, sizeof(buf), mask);
+ printf("initial affinity mask: %s\n", buf);
+ fprintf(stderr, "error: root thread %d affinity mask not equal"
+ " to initial full mask\n", root_id);
+ affinity_mask_free(mask);
+ exit(EXIT_FAILURE);
+ }
+ affinity_mask_free(mask);
+ }
+ return NULL;
+}
+
+int main(int argc, char** argv) {
+ int i;
+ if (argc != 3 && argc != 4) {
+ fprintf(stderr, "usage: %s <num_roots> <register_workers_bool> [<spawn_root_number>]\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ // Initialize pthread mutex
+ pthread_mutex_init(&lock, NULL);
+
+ // Get initial full mask
+ full_mask = affinity_mask_alloc();
+ get_thread_affinity(full_mask);
+
+ // Get the number of root pthreads to create and allocate resources for them
+ num_roots = atoi(argv[1]);
+ pthread_t *roots = (pthread_t*)malloc(sizeof(pthread_t) * num_roots);
+ int *root_ids = (int*)malloc(sizeof(int) * num_roots);
+
+ // Get the flag indicating whether to have root pthreads call omp_set_num_threads() or not
+ register_workers = atoi(argv[2]);
+
+ if (argc == 4)
+ spawner = atoi(argv[3]);
+
+ // Spawn worker root threads
+ for (i = 1; i < num_roots; ++i) {
+ *(root_ids + i) = i;
+ pthread_create(roots + i, NULL, thread_func, root_ids + i);
+ }
+ // Have main root thread (root 0) go into thread_func
+ *root_ids = 0;
+ thread_func(root_ids);
+
+ // Cleanup all resources
+ for (i = 1; i < num_roots; ++i) {
+ void *status;
+ pthread_join(roots[i], &status);
+ }
+ free(roots);
+ free(root_ids);
+ pthread_mutex_destroy(&lock);
+ return EXIT_SUCCESS;
+}