diff options
author | Jakub Jelinek <jakub@redhat.com> | 2021-11-15 13:20:53 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2021-11-15 13:20:53 +0100 |
commit | aea72386831c0c5672f55983034cc709b968daea (patch) | |
tree | ab7e82602a7dc1f8e6fefcc5ef16520eea238392 /libgomp | |
parent | fcdf49a0ad3282761c7ac72103407ca4ec4d6968 (diff) | |
download | gcc-aea72386831c0c5672f55983034cc709b968daea.zip gcc-aea72386831c0c5672f55983034cc709b968daea.tar.gz gcc-aea72386831c0c5672f55983034cc709b968daea.tar.bz2 |
openmp: Add support for thread_limit clause on target
OpenMP 5.1 says that thread_limit clause can also appear on target,
and similarly to teams should affect the thread-limit-var ICV.
On combined target teams, the clause goes to both.
We actually passed thread_limit internally on target already before,
but only used it for gcn/ptx offloading to hint how many threads should be
created and for ptx didn't set thread_limit_var in that case.
Similarly for host fallback.
Also, I found that we weren't copying the args array that contains encoded
thread_limit and num_teams clause for target (etc.) for async target.
2021-11-15 Jakub Jelinek <jakub@redhat.com>
gcc/
* gimplify.c (optimize_target_teams): Only add OMP_CLAUSE_THREAD_LIMIT
to OMP_TARGET_CLAUSES if it isn't there already.
gcc/c-family/
* c-omp.c (c_omp_split_clauses) <case OMP_CLAUSE_THREAD_LIMIT>:
Duplicate to both OMP_TARGET and OMP_TEAMS.
gcc/c/
* c-parser.c (OMP_TARGET_CLAUSE_MASK): Add
PRAGMA_OMP_CLAUSE_THREAD_LIMIT.
gcc/cp/
* parser.c (OMP_TARGET_CLAUSE_MASK): Add
PRAGMA_OMP_CLAUSE_THREAD_LIMIT.
libgomp/
* task.c (gomp_create_target_task): Copy args array as well.
* target.c (gomp_target_fallback): Add args argument.
Set gomp_icv (true)->thread_limit_var if thread_limit is present.
(GOMP_target): Adjust gomp_target_fallback caller.
(GOMP_target_ext): Likewise.
(gomp_target_task_fn): Likewise.
* config/nvptx/team.c (gomp_nvptx_main): Set
gomp_global_icv.thread_limit_var.
* testsuite/libgomp.c-c++-common/thread-limit-1.c: New test.
Diffstat (limited to 'libgomp')
-rw-r--r-- | libgomp/config/nvptx/team.c | 1 | ||||
-rw-r--r-- | libgomp/target.c | 28 | ||||
-rw-r--r-- | libgomp/task.c | 26 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-c++-common/thread-limit-1.c | 23 |
4 files changed, 72 insertions, 6 deletions
diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c index cabf018..310eb28 100644 --- a/libgomp/config/nvptx/team.c +++ b/libgomp/config/nvptx/team.c @@ -55,6 +55,7 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data) if (tid == 0) { gomp_global_icv.nthreads_var = ntids; + gomp_global_icv.thread_limit_var = ntids; /* Starting additional threads is not supported. */ gomp_global_icv.dyn_var = true; diff --git a/libgomp/target.c b/libgomp/target.c index 3c1eee2..5d3103a 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -2362,7 +2362,7 @@ gomp_unload_device (struct gomp_device_descr *devicep) static void gomp_target_fallback (void (*fn) (void *), void **hostaddrs, - struct gomp_device_descr *devicep) + struct gomp_device_descr *devicep, void **args) { struct gomp_thread old_thr, *thr = gomp_thread (); @@ -2378,6 +2378,25 @@ gomp_target_fallback (void (*fn) (void *), void **hostaddrs, thr->place = old_thr.place; thr->ts.place_partition_len = gomp_places_list_len; } + if (args) + while (*args) + { + intptr_t id = (intptr_t) *args++, val; + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + val = (intptr_t) *args++; + else + val = id >> GOMP_TARGET_ARG_VALUE_SHIFT; + if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL) + continue; + id &= GOMP_TARGET_ARG_ID_MASK; + if (id != GOMP_TARGET_ARG_THREAD_LIMIT) + continue; + val = val > INT_MAX ? INT_MAX : val; + if (val) + gomp_icv (true)->thread_limit_var = val; + break; + } + fn (hostaddrs); gomp_free_thread (thr); *thr = old_thr; @@ -2478,7 +2497,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, /* All shared memory devices should use the GOMP_target_ext function. */ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) - return gomp_target_fallback (fn, hostaddrs, devicep); + return gomp_target_fallback (fn, hostaddrs, devicep, NULL); htab_t refcount_set = htab_create (mapnum); struct target_mem_desc *tgt_vars @@ -2617,7 +2636,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, tgt_align, tgt_size); } } - gomp_target_fallback (fn, hostaddrs, devicep); + gomp_target_fallback (fn, hostaddrs, devicep, args); return; } @@ -3052,7 +3071,8 @@ gomp_target_task_fn (void *data) || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) { ttask->state = GOMP_TARGET_TASK_FALLBACK; - gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep); + gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep, + ttask->args); return false; } diff --git a/libgomp/task.c b/libgomp/task.c index feb4796..414ca6e 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -745,6 +745,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep, size_t depend_size = 0; uintptr_t depend_cnt = 0; size_t tgt_align = 0, tgt_size = 0; + uintptr_t args_cnt = 0; if (depend != NULL) { @@ -769,10 +770,22 @@ gomp_create_target_task (struct gomp_device_descr *devicep, tgt_size += tgt_align - 1; else tgt_size = 0; + if (args) + { + void **cargs = args; + while (*cargs) + { + intptr_t id = (intptr_t) *cargs++; + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + cargs++; + } + args_cnt = cargs + 1 - args; + } } task = gomp_malloc (sizeof (*task) + depend_size + sizeof (*ttask) + + args_cnt * sizeof (void *) + mapnum * (sizeof (void *) + sizeof (size_t) + sizeof (unsigned short)) + tgt_size); @@ -785,9 +798,18 @@ gomp_create_target_task (struct gomp_device_descr *devicep, ttask->devicep = devicep; ttask->fn = fn; ttask->mapnum = mapnum; - ttask->args = args; memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); - ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; + if (args_cnt) + { + ttask->args = (void **) &ttask->hostaddrs[mapnum]; + memcpy (ttask->args, args, args_cnt * sizeof (void *)); + ttask->sizes = (size_t *) &ttask->args[args_cnt]; + } + else + { + ttask->args = args; + ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; + } memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); diff --git a/libgomp/testsuite/libgomp.c-c++-common/thread-limit-1.c b/libgomp/testsuite/libgomp.c-c++-common/thread-limit-1.c new file mode 100644 index 0000000..cac2202 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/thread-limit-1.c @@ -0,0 +1,23 @@ +#include <omp.h> +#include <stdlib.h> + +void +foo () +{ + { + #pragma omp target parallel nowait thread_limit (4) num_threads (1) + if (omp_get_thread_limit () > 4) + abort (); + } + #pragma omp taskwait +} + +int +main () +{ + #pragma omp target thread_limit (6) + if (omp_get_thread_limit () > 6) + abort (); + foo (); + return 0; +} |