diff options
author | Marcel Vollweiler <marcel@codesourcery.com> | 2022-05-20 02:08:36 -0700 |
---|---|---|
committer | Marcel Vollweiler <marcel@codesourcery.com> | 2022-05-20 02:29:32 -0700 |
commit | 6c420193e86b39a09304b2845335571eefe24d5d (patch) | |
tree | b19c3d0bd8bacae35e8a6b1e9031e4aa637e219b /libgomp/testsuite/libgomp.c-c++-common | |
parent | 5143faee0d0edfd5849c5f54677cb699bf84a5db (diff) | |
download | gcc-6c420193e86b39a09304b2845335571eefe24d5d.zip gcc-6c420193e86b39a09304b2845335571eefe24d5d.tar.gz gcc-6c420193e86b39a09304b2845335571eefe24d5d.tar.bz2 |
libgomp: Add new runtime routines omp_target_memcpy_async and omp_target_memcpy_rect_async
This patch adds two new OpenMP runtime routines: omp_target_memcpy_async and
omp_target_memcpy_rect_async. Both functions are introduced in OpenMP 5.1 as
asynchronous variants of omp_target_memcpy and omp_target_memcpy_rect.
In contrast to the synchronous variants, the asynchronous functions have two
additional function parameters to allow the specification of task dependences:
int depobj_count
omp_depend_t *depobj_list
integer(c_int), value :: depobj_count
integer(omp_depend_kind), optional :: depobj_list(*)
The implementation splits the synchronous functions into two parts: (a) check
and (b) copy. Then (a) is used in the asynchronous functions for the sequential
part, and the actual copy process (b) is executed in a new created task. The
sequential part (a) takes into account the requirements for the return values:
"The routine returns zero if successful. Otherwise, it returns a non-zero
value." (omp_target_memcpy_async, OpenMP 5.1 spec, section 3.8.7)
"An application can determine the number of inclusive dimensions supported by an
implementation by passing NULL pointers (or C_NULL_PTR, for Fortran) for both
dst and src. The routine returns the number of dimensions supported by the
implementation for the specified device numbers. No copy operation is
performed." (omp_target_memcpy_rect_async, OpenMP 5.1 spec, section 3.8.8)
Due to asynchronicity an error is thrown if the asynchronous memcpy is not
successful (in contrast to the synchronous functions which use a return
value unequal to zero).
gcc/ChangeLog:
* omp-low.cc (omp_runtime_api_call): Added target_memcpy_async and
target_memcpy_rect_async to omp_runtime_apis array.
libgomp/ChangeLog:
* libgomp.map: Added omp_target_memcpy_async and
omp_target_memcpy_rect_async.
* libgomp.texi: Both functions are now supported.
* omp.h.in: Added omp_target_memcpy_async and
omp_target_memcpy_rect_async.
* omp_lib.f90.in: Added interfaces for both new functions.
* omp_lib.h.in: Likewise.
* target.c (ialias_redirect): Added for GOMP_task.
(omp_target_memcpy): Restructured into check and copy part.
(omp_target_memcpy_check): New helper function for omp_target_memcpy and
omp_target_memcpy_async that checks requirements.
(omp_target_memcpy_copy): New helper function for omp_target_memcpy and
omp_target_memcpy_async that performs the memcpy.
(omp_target_memcpy_async_helper): New helper function that is used in
omp_target_memcpy_async for the asynchronous task.
(omp_target_memcpy_async): Added.
(omp_target_memcpy_rect): Restructured into check and copy part.
(omp_target_memcpy_rect_check): New helper function for
omp_target_memcpy_rect and omp_target_memcpy_rect_async that checks
requirements.
(omp_target_memcpy_rect_copy): New helper function for
omp_target_memcpy_rect and omp_target_memcpy_rect_async that performs
the memcpy.
(omp_target_memcpy_rect_async_helper): New helper function that is used
in omp_target_memcpy_rect_async for the asynchronous task.
(omp_target_memcpy_rect_async): Added.
* task.c (ialias): Added for GOMP_task.
* testsuite/libgomp.c-c++-common/target-memcpy-async-1.c: New test.
* testsuite/libgomp.c-c++-common/target-memcpy-async-2.c: New test.
* testsuite/libgomp.c-c++-common/target-memcpy-rect-async-1.c: New test.
* testsuite/libgomp.c-c++-common/target-memcpy-rect-async-2.c: New test.
* testsuite/libgomp.fortran/target-memcpy-async-1.f90: New test.
* testsuite/libgomp.fortran/target-memcpy-async-2.f90: New test.
* testsuite/libgomp.fortran/target-memcpy-rect-async-1.f90: New test.
* testsuite/libgomp.fortran/target-memcpy-rect-async-2.f90: New test.
Diffstat (limited to 'libgomp/testsuite/libgomp.c-c++-common')
4 files changed, 279 insertions, 0 deletions
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-async-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-async-1.c new file mode 100644 index 0000000..f25c3bb --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-async-1.c @@ -0,0 +1,46 @@ +/* Test for omp_target_memcpy_async without considering dependence objects. */ + +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + int q[128], i; + void *p; + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + p = omp_target_alloc (130 * sizeof (int), d); + if (p == NULL) + return 0; + + for (i = 0; i < 128; i++) + q[i] = i; + + if (omp_target_memcpy_async (p, q, 128 * sizeof (int), sizeof (int), 0, d, id, + 0, NULL)) + abort (); + + #pragma omp taskwait + + int q2[128]; + for (i = 0; i < 128; ++i) + q2[i] = 0; + if (omp_target_memcpy_async (q2, p, 128 * sizeof(int), 0, sizeof (int), id, d, + 0, NULL)) + abort (); + + #pragma omp taskwait + + for (i = 0; i < 128; ++i) + if (q2[i] != q[i]) + abort (); + + omp_target_free (p, d); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-async-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-async-2.c new file mode 100644 index 0000000..d1353a5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-async-2.c @@ -0,0 +1,74 @@ +/* Test for omp_target_memcpy_async considering dependence objects. */ + +#include <omp.h> +#include <stdlib.h> + +int +main () +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + int a[128], b[64], c[32], e[16], q[128], i; + void *p; + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + p = omp_target_alloc (130 * sizeof (int), d); + if (p == NULL) + return 0; + + for (i = 0; i < 128; ++i) + a[i] = i + 1; + for (i = 0; i < 64; ++i) + b[i] = i + 2; + for (i = 0; i < 32; i++) + c[i] = 0; + for (i = 0; i < 16; i++) + e[i] = i + 4; + + omp_depend_t obj[2]; + + #pragma omp parallel num_threads(5) + #pragma omp single + { + #pragma omp task depend(out: p) + omp_target_memcpy (p, a, 128 * sizeof (int), 0, 0, d, id); + + #pragma omp task depend(inout: p) + omp_target_memcpy (p, b, 64 * sizeof (int), 0, 0, d, id); + + #pragma omp task depend(out: c) + for (i = 0; i < 32; i++) + c[i] = i + 3; + + #pragma omp depobj(obj[0]) depend(inout: p) + #pragma omp depobj(obj[1]) depend(in: c) + omp_target_memcpy_async (p, c, 32 * sizeof (int), 0, 0, d, id, 2, obj); + + #pragma omp task depend(in: p) + omp_target_memcpy (p, e, 16 * sizeof (int), 0, 0, d, id); + } + + #pragma omp taskwait + + for (i = 0; i < 128; ++i) + q[i] = 0; + omp_target_memcpy (q, p, 128 * sizeof(int), 0, 0, id, d); + for (i = 0; i < 16; ++i) + if (q[i] != i + 4) + abort (); + for (i = 16; i < 32; ++i) + if (q[i] != i + 3) + abort (); + for (i = 32; i < 64; ++i) + if (q[i] != i + 2) + abort (); + for (i = 64; i < 128; ++i) + if (q[i] != i + 1) + abort (); + + omp_target_free (p, d); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-rect-async-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-rect-async-1.c new file mode 100644 index 0000000..176bceb --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-rect-async-1.c @@ -0,0 +1,68 @@ +/* Test for omp_target_memcpy_rect_async without considering dependence + objects. */ + +#include <omp.h> +#include <stdlib.h> + +#define NUM_DIMS 3 + +int +main () +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + int q[128], q2[128], i; + void *p; + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + p = omp_target_alloc (130 * sizeof (int), d); + if (p == NULL) + return 0; + + if (omp_target_memcpy_rect_async (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, + NULL, d, id, 0, NULL) < 3 + || omp_target_memcpy_rect_async (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, + NULL, id, d, 0, NULL) < 3 + || omp_target_memcpy_rect_async (NULL, NULL, 0, 0, NULL, NULL, NULL, NULL, + NULL, id, id, 0, NULL) < 3) + abort (); + + for (i = 0; i < 128; i++) + q[i] = 0; + if (omp_target_memcpy (p, q, 128 * sizeof (int), 0, 0, d, id) != 0) + abort (); + + for (i = 0; i < 128; i++) + q[i] = i + 1; + + size_t volume[NUM_DIMS] = { 1, 2, 3 }; + size_t dst_offsets[NUM_DIMS] = { 0, 0, 0 }; + size_t src_offsets[NUM_DIMS] = { 0, 0, 0 }; + size_t dst_dimensions[NUM_DIMS] = { 3, 4, 5 }; + size_t src_dimensions[NUM_DIMS] = { 2, 3, 4 }; + + if (omp_target_memcpy_rect_async (p, q, sizeof (int), NUM_DIMS, volume, + dst_offsets, src_offsets, dst_dimensions, + src_dimensions, d, id, 0, NULL) != 0) + abort (); + + #pragma omp taskwait + + for (i = 0; i < 128; i++) + q2[i] = 0; + if (omp_target_memcpy (q2, p, 128 * sizeof (int), 0, 0, id, d) != 0) + abort (); + + /* q2 is expected to contain: 1 2 3 0 0 5 6 7 0 0 .. 0 */ + if (q2[0] != 1 || q2[1] != 2 || q2[2] !=3 || q2[3] != 0 || q2[4] != 0 + || q2[5] != 5 || q2[6] != 6 || q2[7] != 7) + abort (); + for (i = 8; i < 128; ++i) + if (q2[i] != 0) + abort (); + + omp_target_free (p, d); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-rect-async-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-rect-async-2.c new file mode 100644 index 0000000..4a5d80f --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-memcpy-rect-async-2.c @@ -0,0 +1,91 @@ +/* Test for omp_target_memcpy_rect_async considering dependence objects. */ + +#include <omp.h> +#include <stdlib.h> + +#define NUM_DIMS 3 + +int +main () +{ + int d = omp_get_default_device (); + int id = omp_get_initial_device (); + int a[128], b[64], c[128], e[16], q[128], i; + void *p; + + if (d < 0 || d >= omp_get_num_devices ()) + d = id; + + p = omp_target_alloc (130 * sizeof (int), d); + if (p == NULL) + return 0; + + for (i = 0; i < 128; i++) + q[i] = 0; + if (omp_target_memcpy (p, q, 128 * sizeof (int), 0, 0, d, id) != 0) + abort (); + + size_t volume[NUM_DIMS] = { 2, 2, 3 }; + size_t dst_offsets[NUM_DIMS] = { 0, 0, 0 }; + size_t src_offsets[NUM_DIMS] = { 0, 0, 0 }; + size_t dst_dimensions[NUM_DIMS] = { 3, 4, 5 }; + size_t src_dimensions[NUM_DIMS] = { 2, 3, 4 }; + + for (i = 0; i < 128; i++) + a[i] = 42; + for (i = 0; i < 64; i++) + b[i] = 24; + for (i = 0; i < 128; i++) + c[i] = 0; + for (i = 0; i < 16; i++) + e[i] = 77; + + omp_depend_t obj[2]; + + #pragma omp parallel num_threads(5) + #pragma omp single + { + #pragma omp task depend (out: p) + omp_target_memcpy (p, a, 128 * sizeof (int), 0, 0, d, id); + + #pragma omp task depend(inout: p) + omp_target_memcpy (p, b, 64 * sizeof (int), 0, 0, d, id); + + #pragma omp task depend(out: c) + for (i = 0; i < 128; i++) + c[i] = i + 1; + + #pragma omp depobj(obj[0]) depend(inout: p) + #pragma omp depobj(obj[1]) depend(in: c) + + /* This produces: 1 2 3 - - 5 6 7 - - at positions 0..9 and + 13 14 15 - - 17 18 19 - - at positions 20..29. */ + omp_target_memcpy_rect_async (p, c, sizeof (int), NUM_DIMS, volume, + dst_offsets, src_offsets, dst_dimensions, + src_dimensions, d, id, 2, obj); + + #pragma omp task depend(in: p) + omp_target_memcpy (p, e, 16 * sizeof (int), 0, 0, d, id); + } + + #pragma omp taskwait + + if (omp_target_memcpy (q, p, 128 * sizeof(int), 0, 0, id, d) != 0) + abort (); + + for (i = 0; i < 16; ++i) + if (q[i] != 77) + abort (); + if (q[20] != 13 || q[21] != 14 || q[22] != 15 || q[25] != 17 || q[26] != 18 + || q[27] != 19) + abort (); + for (i = 28; i < 64; ++i) + if (q[i] != 24) + abort (); + for (i = 64; i < 128; ++i) + if (q[i] != 42) + abort (); + + omp_target_free (p, d); + return 0; +} |