aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--openmp/runtime/src/i18n/en_US.txt2
-rw-r--r--openmp/runtime/src/kmp_affinity.cpp100
-rw-r--r--openmp/runtime/src/kmp_affinity.h12
-rw-r--r--openmp/runtime/src/kmp_settings.cpp65
4 files changed, 122 insertions, 57 deletions
diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt
index 5a54e11..351da54 100644
--- a/openmp/runtime/src/i18n/en_US.txt
+++ b/openmp/runtime/src/i18n/en_US.txt
@@ -469,6 +469,8 @@ AffHWSubsetNotExistGeneric "KMP_HW_SUBSET ignored: %1$s: level not detected in
AffHWSubsetEqvLayers "KMP_HW_SUBSET ignored: %1$s, %2$s: layers are equivalent, please only specify one."
AffHWSubsetOutOfOrder "KMP_HW_SUBSET ignored: %1$s layer should come after %2$s."
AffEqualTopologyTypes "%1$s: topology layer \"%2$s\" is equivalent to \"%3$s\"."
+AffGranTooCoarseProcGroup "%1$s: granularity=%2$s is too coarse, setting granularity=group."
+StgDeprecatedValue "%1$s: \"%2$s\" value is deprecated. Please use \"%3$s\" instead."
# --------------------------------------------------------------------------------------------------
-*- HINTS -*-
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 9521cc4..a46a7bb 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -198,14 +198,82 @@ void kmp_hw_thread_t::print() const {
////////////////////////////////////////////////////////////////////////////////
// kmp_topology_t methods
+// Add a layer to the topology based on the ids. Assume the topology
+// is perfectly nested (i.e., so no object has more than one parent)
+void kmp_topology_t::_insert_layer(kmp_hw_t type, const int *ids) {
+ // Figure out where the layer should go by comparing the ids of the current
+ // layers with the new ids
+ int target_layer;
+ int previous_id = kmp_hw_thread_t::UNKNOWN_ID;
+ int previous_new_id = kmp_hw_thread_t::UNKNOWN_ID;
+
+ // Start from the highest layer and work down to find target layer
+ // If new layer is equal to another layer then put the new layer above
+ for (target_layer = 0; target_layer < depth; ++target_layer) {
+ bool layers_equal = true;
+ bool strictly_above_target_layer = false;
+ for (int i = 0; i < num_hw_threads; ++i) {
+ int id = hw_threads[i].ids[target_layer];
+ int new_id = ids[i];
+ if (id != previous_id && new_id == previous_new_id) {
+ // Found the layer we are strictly above
+ strictly_above_target_layer = true;
+ layers_equal = false;
+ break;
+ } else if (id == previous_id && new_id != previous_new_id) {
+ // Found a layer we are below. Move to next layer and check.
+ layers_equal = false;
+ break;
+ }
+ previous_id = id;
+ previous_new_id = new_id;
+ }
+ if (strictly_above_target_layer || layers_equal)
+ break;
+ }
+
+ // Found the layer we are above. Now move everything to accommodate the new
+ // layer. And put the new ids and type into the topology.
+ for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
+ types[j] = types[i];
+ types[target_layer] = type;
+ for (int k = 0; k < num_hw_threads; ++k) {
+ for (int i = depth - 1, j = depth; i >= target_layer; --i, --j)
+ hw_threads[k].ids[j] = hw_threads[k].ids[i];
+ hw_threads[k].ids[target_layer] = ids[k];
+ }
+ equivalent[type] = type;
+ depth++;
+}
+
+#if KMP_GROUP_AFFINITY
+// Insert the Windows Processor Group structure into the topology
+void kmp_topology_t::_insert_windows_proc_groups() {
+ // Do not insert the processor group structure for a single group
+ if (__kmp_num_proc_groups == 1)
+ return;
+ kmp_affin_mask_t *mask;
+ int *ids = (int *)__kmp_allocate(sizeof(int) * num_hw_threads);
+ KMP_CPU_ALLOC(mask);
+ for (int i = 0; i < num_hw_threads; ++i) {
+ KMP_CPU_ZERO(mask);
+ KMP_CPU_SET(hw_threads[i].os_id, mask);
+ ids[i] = __kmp_get_proc_group(mask);
+ }
+ KMP_CPU_FREE(mask);
+ _insert_layer(KMP_HW_PROC_GROUP, ids);
+ __kmp_free(ids);
+}
+#endif
+
// Remove layers that don't add information to the topology.
// This is done by having the layer take on the id = UNKNOWN_ID (-1)
void kmp_topology_t::_remove_radix1_layers() {
int preference[KMP_HW_LAST];
int top_index1, top_index2;
// Set up preference associative array
- preference[KMP_HW_PROC_GROUP] = 110;
- preference[KMP_HW_SOCKET] = 100;
+ preference[KMP_HW_SOCKET] = 110;
+ preference[KMP_HW_PROC_GROUP] = 100;
preference[KMP_HW_CORE] = 95;
preference[KMP_HW_THREAD] = 90;
preference[KMP_HW_NUMA] = 85;
@@ -440,7 +508,7 @@ kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
kmp_topology_t *retval;
// Allocate all data in one large allocation
size_t size = sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc +
- sizeof(int) * ndepth * 3;
+ sizeof(int) * (size_t)KMP_HW_LAST * 3;
char *bytes = (char *)__kmp_allocate(size);
retval = (kmp_topology_t *)bytes;
if (nproc > 0) {
@@ -453,8 +521,8 @@ kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
int *arr =
(int *)(bytes + sizeof(kmp_topology_t) + sizeof(kmp_hw_thread_t) * nproc);
retval->types = (kmp_hw_t *)arr;
- retval->ratio = arr + ndepth;
- retval->count = arr + 2 * ndepth;
+ retval->ratio = arr + (size_t)KMP_HW_LAST;
+ retval->count = arr + 2 * (size_t)KMP_HW_LAST;
KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
for (int i = 0; i < ndepth; ++i) {
retval->types[i] = types[i];
@@ -651,6 +719,9 @@ void kmp_topology_t::print(const char *env_var) const {
}
void kmp_topology_t::canonicalize() {
+#if KMP_GROUP_AFFINITY
+ _insert_windows_proc_groups();
+#endif
_remove_radix1_layers();
_gather_enumeration_information();
_discover_uniformity();
@@ -699,6 +770,25 @@ void kmp_topology_t::canonicalize() {
__kmp_hw_get_catalog_string(gran_type));
__kmp_affinity_gran = gran_type;
}
+#if KMP_GROUP_AFFINITY
+ // If more than one processor group exists, and the level of
+ // granularity specified by the user is too coarse, then the
+ // granularity must be adjusted "down" to processor group affinity
+ // because threads can only exist within one processor group.
+ // For example, if a user sets granularity=socket and there are two
+ // processor groups that cover a socket, then the runtime must
+ // restrict the granularity down to the processor group level.
+ if (__kmp_num_proc_groups > 1) {
+ int gran_depth = __kmp_topology->get_level(gran_type);
+ int proc_group_depth = __kmp_topology->get_level(KMP_HW_PROC_GROUP);
+ if (gran_depth >= 0 && proc_group_depth >= 0 &&
+ gran_depth < proc_group_depth) {
+ KMP_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY",
+ __kmp_hw_get_catalog_string(__kmp_affinity_gran));
+ __kmp_affinity_gran = gran_type = KMP_HW_PROC_GROUP;
+ }
+ }
+#endif
__kmp_affinity_gran_levels = 0;
for (int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i)
__kmp_affinity_gran_levels++;
diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index 5b1569b..76ba38b 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -638,7 +638,9 @@ class kmp_topology_t {
int depth;
- // The following arrays are all 'depth' long
+ // The following arrays are all 'depth' long and have been
+ // allocated to hold up to KMP_HW_LAST number of objects if
+ // needed so layers can be added without reallocation of any array
// Orderd array of the types in the topology
kmp_hw_t *types;
@@ -671,6 +673,14 @@ class kmp_topology_t {
// Flags describing the topology
flags_t flags;
+ // Insert a new topology layer after allocation
+ void _insert_layer(kmp_hw_t type, const int *ids);
+
+#if KMP_GROUP_AFFINITY
+ // Insert topology information about Windows Processor groups
+ void _insert_windows_proc_groups();
+#endif
+
// Count each item & get the num x's per y
// e.g., get the number of cores and the number of threads per core
// for each (x, y) in (KMP_HW_* , KMP_HW_*)
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index a8c08f8..8f7cee2 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -3136,6 +3136,7 @@ static void __kmp_stg_parse_topology_method(char const *name, char const *value,
}
#if KMP_GROUP_AFFINITY
else if (__kmp_str_match("group", 1, value)) {
+ KMP_WARNING(StgDeprecatedValue, name, value, "all");
__kmp_affinity_top_method = affinity_top_method_group;
}
#endif /* KMP_GROUP_AFFINITY */
@@ -6029,65 +6030,27 @@ void __kmp_env_initialize(char const *string) {
// Handle the Win 64 group affinity stuff if there are multiple
// processor groups, or if the user requested it, and OMP 4.0
// affinity is not in effect.
- if (((__kmp_num_proc_groups > 1) &&
- (__kmp_affinity_type == affinity_default) &&
- (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default)) ||
- (__kmp_affinity_top_method == affinity_top_method_group)) {
+ if (__kmp_num_proc_groups > 1 &&
+ __kmp_affinity_type == affinity_default &&
+ __kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
+ // Do not respect the initial processor affinity mask if it is assigned
+ // exactly one Windows Processor Group since this is interpreted as the
+ // default OS assignment. Not respecting the mask allows the runtime to
+ // use all the logical processors in all groups.
if (__kmp_affinity_respect_mask == affinity_respect_mask_default &&
exactly_one_group) {
__kmp_affinity_respect_mask = FALSE;
}
+ // Use compact affinity with anticipation of pinning to at least the
+ // group granularity since threads can only be bound to one group.
if (__kmp_affinity_type == affinity_default) {
__kmp_affinity_type = affinity_compact;
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
}
- if (__kmp_affinity_top_method == affinity_top_method_default) {
- if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
- __kmp_affinity_top_method = affinity_top_method_group;
- __kmp_affinity_gran = KMP_HW_PROC_GROUP;
- } else if (__kmp_affinity_gran == KMP_HW_PROC_GROUP) {
- __kmp_affinity_top_method = affinity_top_method_group;
- } else {
- __kmp_affinity_top_method = affinity_top_method_all;
- }
- } else if (__kmp_affinity_top_method == affinity_top_method_group) {
- if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
- __kmp_affinity_gran = KMP_HW_PROC_GROUP;
- } else if ((__kmp_affinity_gran != KMP_HW_PROC_GROUP) &&
- (__kmp_affinity_gran != KMP_HW_THREAD)) {
- const char *str = __kmp_hw_get_keyword(__kmp_affinity_gran);
- KMP_WARNING(AffGranTopGroup, var, str);
- __kmp_affinity_gran = KMP_HW_THREAD;
- }
- } else {
- if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
- __kmp_affinity_gran = KMP_HW_CORE;
- } else if (__kmp_affinity_gran == KMP_HW_PROC_GROUP) {
- const char *str = NULL;
- switch (__kmp_affinity_type) {
- case affinity_physical:
- str = "physical";
- break;
- case affinity_logical:
- str = "logical";
- break;
- case affinity_compact:
- str = "compact";
- break;
- case affinity_scatter:
- str = "scatter";
- break;
- case affinity_explicit:
- str = "explicit";
- break;
- // No MIC on windows, so no affinity_balanced case
- default:
- KMP_DEBUG_ASSERT(0);
- }
- KMP_WARNING(AffGranGroupType, var, str);
- __kmp_affinity_gran = KMP_HW_CORE;
- }
- }
+ if (__kmp_affinity_top_method == affinity_top_method_default)
+ __kmp_affinity_top_method = affinity_top_method_all;
+ if (__kmp_affinity_gran == KMP_HW_UNKNOWN)
+ __kmp_affinity_gran = KMP_HW_PROC_GROUP;
} else
#endif /* KMP_GROUP_AFFINITY */