diff options
author | Jakub Jelinek <jakub@redhat.com> | 2021-08-23 10:16:24 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2021-08-23 10:16:24 +0200 |
commit | 3bc75533d1f87f0617be6c1af98804f9127ec637 (patch) | |
tree | fa6ecc3f9840f638a87d8f4f39884c31ec7e7bab /libgomp | |
parent | 6f1a3668f5ee5152bdcca59843802e587339eda1 (diff) | |
download | gcc-3bc75533d1f87f0617be6c1af98804f9127ec637.zip gcc-3bc75533d1f87f0617be6c1af98804f9127ec637.tar.gz gcc-3bc75533d1f87f0617be6c1af98804f9127ec637.tar.bz2 |
openmp: Add support for strict modifier on grainsize/num_tasks clauses
With strict: modifier on these clauses, the standard is explicit about
how many iterations (and which) each generated task of taskloop directive
should contain. For num_tasks it actually matches what we were already
implementing, but for grainsize it does not (and even violates the old
rule - without strict it requires that the number of iterations (unspecified
which exactly) handled by each generated task is >= grainsize argument and
< 2 * grainsize argument, with strict: it requires that each generated
task handles exactly == grainsize argument iterations, except for the
generated task handling the last iteration which can handles <= grainsize
iterations).
The following patch implements it for C and C++.
2021-08-23 Jakub Jelinek <jakub@redhat.com>
gcc/
* tree.h (OMP_CLAUSE_GRAINSIZE_STRICT): Define.
(OMP_CLAUSE_NUM_TASKS_STRICT): Define.
* tree-pretty-print.c (dump_omp_clause) <case OMP_CLAUSE_GRAINSIZE,
case OMP_CLAUSE_NUM_TASKS>: Print strict: modifier.
* omp-expand.c (expand_task_call): Use GOMP_TASK_FLAG_STRICT in iflags
if either grainsize or num_tasks clause has the strict modifier.
gcc/c/
* c-parser.c (c_parser_omp_clause_num_tasks,
c_parser_omp_clause_grainsize): Parse the optional strict: modifier.
gcc/cp/
* parser.c (cp_parser_omp_clause_num_tasks,
cp_parser_omp_clause_grainsize): Parse the optional strict: modifier.
include/
* gomp-constants.h (GOMP_TASK_FLAG_STRICT): Define.
libgomp/
* taskloop.c (GOMP_taskloop): Handle GOMP_TASK_FLAG_STRICT.
* testsuite/libgomp.c-c++-common/taskloop-4.c (main): Fix up comment.
* testsuite/libgomp.c-c++-common/taskloop-5.c: New test.
Diffstat (limited to 'libgomp')
-rw-r--r-- | libgomp/taskloop.c | 27 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c | 3 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c | 135 |
3 files changed, 160 insertions, 5 deletions
diff --git a/libgomp/taskloop.c b/libgomp/taskloop.c index 791178a..9d27dd0 100644 --- a/libgomp/taskloop.c +++ b/libgomp/taskloop.c @@ -97,6 +97,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), #endif TYPE task_step = step; + TYPE nfirst_task_step = step; unsigned long nfirst = n; if (flags & GOMP_TASK_FLAG_GRAINSIZE) { @@ -109,7 +110,22 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), if (num_tasks != ndiv) num_tasks = ~0UL; #endif - if (num_tasks <= 1) + if ((flags & GOMP_TASK_FLAG_STRICT) + && num_tasks != ~0ULL) + { + UTYPE mod = n % grainsize; + task_step = (TYPE) grainsize * step; + if (mod) + { + num_tasks++; + nfirst_task_step = (TYPE) mod * step; + if (num_tasks == 1) + task_step = nfirst_task_step; + else + nfirst = num_tasks - 2; + } + } + else if (num_tasks <= 1) { num_tasks = 1; task_step = end - start; @@ -124,6 +140,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task_step = (TYPE) grainsize * step; if (mul != n) { + nfirst_task_step = task_step; task_step += step; nfirst = n - mul - 1; } @@ -135,6 +152,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task_step = (TYPE) div * step; if (mod) { + nfirst_task_step = task_step; task_step += step; nfirst = mod - 1; } @@ -153,6 +171,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), task_step = (TYPE) div * step; if (mod) { + nfirst_task_step = task_step; task_step += step; nfirst = mod - 1; } @@ -225,7 +244,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), start += task_step; ((TYPE *)arg)[1] = start; if (i == nfirst) - task_step -= step; + task_step = nfirst_task_step; fn (arg); arg += arg_size; if (!priority_queue_empty_p (&task[i].children_queue, @@ -258,7 +277,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), start += task_step; ((TYPE *)data)[1] = start; if (i == nfirst) - task_step -= step; + task_step = nfirst_task_step; fn (data); if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) @@ -303,7 +322,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), start += task_step; ((TYPE *)arg)[1] = start; if (i == nfirst) - task_step -= step; + task_step = nfirst_task_step; thr->task = parent; task->kind = GOMP_TASK_WAITING; task->fn = fn; diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c index 4ac1b5a..b949938 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-4.c @@ -85,7 +85,8 @@ main () if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters) != 7 || ntasks != 1 || min_iters != 7 || max_iters != 7) __builtin_abort (); - /* If num_tasks is present, # of task loop iters is min (# of loop iters, num_tasks). */ + /* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks) + and each task has at least one iteration. */ if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters) != 54 || ntasks != 9) __builtin_abort (); diff --git a/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c b/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c new file mode 100644 index 0000000..1b64a6d --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c @@ -0,0 +1,135 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +int u[64], v, w[64]; + +__attribute__((noinline, noclone)) int +test (int a, int b, int c, int d, void (*fn) (int, int, int, int), + int *num_tasks, int *min_iters, int *max_iters, int *sep) +{ + int i, j, t = 0; + __builtin_memset (u, 0, sizeof u); + v = 0; + fn (a, b, c, d); + *min_iters = 0; + *max_iters = 0; + *num_tasks = v; + *sep = v; + if (v) + { + *min_iters = u[0]; + *max_iters = u[0]; + t = u[0]; + for (i = 1; i < v; i++) + { + if (*min_iters > u[i]) + *min_iters = u[i]; + if (*max_iters < u[i]) + *max_iters = u[i]; + t += u[i]; + } + if (*min_iters != *max_iters) + { + for (i = 0; i < v - 1; i++) + { + int min_idx = i; + for (j = i + 1; j < v; j++) + if (w[min_idx] > w[j]) + min_idx = j; + if (min_idx != i) + { + int tem = u[i]; + u[i] = u[min_idx]; + u[min_idx] = tem; + tem = w[i]; + w[i] = w[min_idx]; + w[min_idx] = tem; + } + } + if (u[0] != *max_iters) + __builtin_abort (); + for (i = 1; i < v; i++) + if (u[i] != u[i - 1]) + { + if (*sep != v || u[i] != *min_iters) + __builtin_abort (); + *sep = i; + } + } + } + return t; +} + +void +grainsize (int a, int b, int c, int d) +{ + int i, j = 0, k = 0; + #pragma omp taskloop firstprivate (j, k) grainsize(strict:d) + for (i = a; i < b; i += c) + { + if (j == 0) + { + #pragma omp atomic capture + k = v++; + if (k >= 64) + __builtin_abort (); + w[k] = i; + } + u[k] = ++j; + } +} + +void +num_tasks (int a, int b, int c, int d) +{ + int i, j = 0, k = 0; + #pragma omp taskloop firstprivate (j, k) num_tasks(strict:d) + for (i = a; i < b; i += c) + { + if (j == 0) + { + #pragma omp atomic capture + k = v++; + if (k >= 64) + __builtin_abort (); + w[k] = i; + } + u[k] = ++j; + } +} + +int +main () +{ + #pragma omp parallel + #pragma omp single + { + int min_iters, max_iters, ntasks, sep; + /* If grainsize is present and has strict modifier, # of task loop iters is == grainsize, + except that it can be smaller on the last task. */ + if (test (0, 79, 1, 17, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 79 + || ntasks != 5 || min_iters != 11 || max_iters != 17 || sep != 4) + __builtin_abort (); + if (test (-49, 2541, 7, 28, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 370 + || ntasks != 14 || min_iters != 6 || max_iters != 28 || sep != 13) + __builtin_abort (); + if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 7 + || ntasks != 1 || min_iters != 7 || max_iters != 7 || sep != 1) + __builtin_abort (); + /* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks) + and each task has at least one iteration. If strict modifier is present, + first set of tasks has ceil (# of loop iters / num_tasks) iterations, + followed by possibly empty set of tasks with floor (# of loop iters / num_tasks) + iterations. */ + if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 54 + || ntasks != 9 || min_iters != 6 || max_iters != 6 || sep != 9) + __builtin_abort (); + if (test (0, 57, 1, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 57 + || ntasks != 9 || min_iters != 6 || max_iters != 7 || sep != 3) + __builtin_abort (); + if (test (0, 25, 2, 17, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 13 + || ntasks != 13 || min_iters != 1 || max_iters != 1 || sep != 13) + __builtin_abort (); + } + return 0; +} |