diff options
Diffstat (limited to 'libgomp')
-rw-r--r-- | libgomp/ChangeLog | 18 | ||||
-rw-r--r-- | libgomp/ChangeLog.omp | 38 | ||||
-rw-r--r-- | libgomp/libgomp-plugin.h | 2 | ||||
-rw-r--r-- | libgomp/libgomp.h | 4 | ||||
-rw-r--r-- | libgomp/libgomp.map | 6 | ||||
-rw-r--r-- | libgomp/libgomp.texi | 39 | ||||
-rw-r--r-- | libgomp/oacc-mem.c | 44 | ||||
-rw-r--r-- | libgomp/openacc.f90 | 22 | ||||
-rw-r--r-- | libgomp/openacc.h | 4 | ||||
-rw-r--r-- | libgomp/openacc_lib.h | 24 | ||||
-rw-r--r-- | libgomp/plugin/plugin-gcn.c | 17 | ||||
-rw-r--r-- | libgomp/plugin/plugin-nvptx.c | 43 | ||||
-rw-r--r-- | libgomp/target.c | 14 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.fortran/metadirective-1.f90 | 9 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c | 96 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 | 113 |
16 files changed, 472 insertions, 21 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 6cef1dd..c9035c3 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,21 @@ +2025-05-28 Tobias Burnus <tburnus@baylibre.com> + + Backported from master: + 2025-05-28 Tobias Burnus <tburnus@baylibre.com> + + PR middle-end/118694 + * testsuite/libgomp.fortran/metadirective-1.f90: xfail when + compiling (also) for nvptx offloading as an error is then expected. + +2025-05-26 Tobias Burnus <tburnus@baylibre.com> + + Backported from master: + 2025-05-23 Tobias Burnus <tburnus@baylibre.com> + + PR middle-end/118694 + * testsuite/libgomp.c-c++-common/metadirective-1.c: xfail when + compiling (also) for nvptx offloading as an error is then expected. + 2025-05-19 Tobias Burnus <tburnus@baylibre.com> Backported from master: diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index afc8184..5ecab6d 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -122,6 +122,42 @@ * testsuite/libgomp.c-c++-common/target-abi-struct-1.c: New. * testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: Adjust. +2025-05-30 Tobias Burnus <tburnus@baylibre.com> + + Backported from master: + 2025-05-29 Tobias Burnus <tburnus@baylibre.com> + + PR libgomp/93226 + * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_dev2dev): New + prototype. + * libgomp.h (struct acc_dispatch_t): Add dev2dev_func. + (gomp_copy_dev2dev): New prototype. + * libgomp.map (OACC_2.6.1): New; add acc_memcpy_device{,_async}. + * libgomp.texi (acc_memcpy_device): New. + * oacc-mem.c (memcpy_tofrom_device): Change to take from/to + device boolean; use memcpy not memmove; add early return if + size == 0 or same device + same ptr. + (acc_memcpy_to_device, acc_memcpy_to_device_async, + acc_memcpy_from_device, acc_memcpy_from_device_async): Update. + (acc_memcpy_device, acc_memcpy_device_async): New. + * openacc.f90 (acc_memcpy_device, acc_memcpy_device_async): + Add interface. + * openacc_lib.h (acc_memcpy_device, acc_memcpy_device_async): + Likewise. + * openacc.h (acc_memcpy_device, acc_memcpy_device_async): Add + prototype. + * plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev): + Update comment. + (GOMP_OFFLOAD_openacc_async_dev2host): Update call. + (GOMP_OFFLOAD_openacc_async_dev2dev): New. + * plugin/plugin-nvptx.c (cuda_memcpy_dev_sanity_check): New. + (GOMP_OFFLOAD_dev2dev): Call it. + (GOMP_OFFLOAD_openacc_async_dev2dev): New. + * target.c (gomp_copy_dev2dev): New. + (gomp_load_plugin_for_device): Load dev2dev and async_dev2dev. + * testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c: New test. + * testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90: New test. + 2025-05-22 Thomas Schwinge <tschwinge@baylibre.com> Backported from master: @@ -1014,4 +1050,4 @@ * testsuite/libgomp.oacc-c-c++-common/noncontig_array-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/noncontig_array-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/noncontig_array-utils.h: Support - header for new tests.
\ No newline at end of file + header for new tests. diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 479264b..3c7741b 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -202,6 +202,8 @@ extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size struct goacc_asyncqueue *); extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t, struct goacc_asyncqueue *); +extern bool GOMP_OFFLOAD_openacc_async_dev2dev (int, void *, const void *, size_t, + struct goacc_asyncqueue *); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *); diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 04f3c6d..571ac62c 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1378,6 +1378,7 @@ typedef struct acc_dispatch_t __typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func; __typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func; __typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func; + __typeof (GOMP_OFFLOAD_openacc_async_dev2dev) *dev2dev_func; } async; __typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func; @@ -1485,6 +1486,9 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *, extern void gomp_copy_dev2host (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, size_t); +extern void gomp_copy_dev2dev (struct gomp_device_descr *, + struct goacc_asyncqueue *, void *, const void *, + size_t); extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t); extern bool gomp_attach_pointer (struct gomp_device_descr *, struct goacc_asyncqueue *, splay_tree, diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 6e2cdbf..bc2de6b 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -610,6 +610,12 @@ OACC_2.6 { acc_get_property_string_h_; } OACC_2.5.1; +OACC_2.6.1 { + global: + acc_memcpy_device; + acc_memcpy_device_async; +} OACC_2.6; + GOACC_2.0 { global: GOACC_data_end; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 658df0e..e1b70b0 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -4780,6 +4780,7 @@ acceleration device. present on device. * acc_memcpy_to_device:: Copy host memory to device memory. * acc_memcpy_from_device:: Copy device memory to host memory. +* acc_memcpy_device:: Copy memory within a device. * acc_attach:: Let device pointer point to device-pointer target. * acc_detach:: Let device pointer point to host-pointer target. @@ -5854,6 +5855,44 @@ This function copies device memory specified by device address of +@node acc_memcpy_device +@section @code{acc_memcpy_device} -- Copy memory within a device. +@table @asis +@item @emph{Description} +This function copies device memory from one memory location to another +on the current device. It copies @var{bytes} bytes of data from the device +address, specified by @var{data_dev_src}, to the device address +@var{data_dev_dest}. The @code{_async} version performs the transfer +asnychronously using the queue associated with @var{async_arg}. + +@item @emph{C/C++}: +@multitable @columnfractions .20 .80 +@item @emph{Prototype}: @tab @code{void acc_memcpy_device(d_void* data_dev_dest,} +@item @tab @code{d_void* data_dev_src, size_t bytes);} +@item @emph{Prototype}: @tab @code{void acc_memcpy_device_async(d_void* data_dev_dest,} +@item @tab @code{d_void* data_dev_src, size_t bytes, int async_arg);} +@end multitable + +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device(data_dev_dest, &} +@item @tab @code{data_dev_src, bytes)} +@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device_async(data_dev_dest, &} +@item @tab @code{data_dev_src, bytes, async_arg)} +@item @tab @code{type(c_ptr), value :: data_dev_dest} +@item @tab @code{type(c_ptr), value :: data_dev_src} +@item @tab @code{integer(c_size_t), value :: bytes} +@item @tab @code{integer(acc_handle_kind), value :: async_arg} +@end multitable + +@item @emph{Reference}: +@uref{https://www.openacc.org, OpenACC specification v2.6}, section +3.2.33. @uref{https://www.openacc.org, OpenACC specification v3.3}, section +3.2.28. +@end table + + + @node acc_attach @section @code{acc_attach} -- Let device pointer point to device-pointer target. @table @asis diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index a00ea16..e40b41b 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -171,21 +171,22 @@ acc_free (void *d) } static void -memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, - const char *libfnname) +memcpy_tofrom_device (bool dev_to, bool dev_from, void *dst, void *src, + size_t s, int async, const char *libfnname) { /* No need to call lazy open here, as the device pointer must have been obtained from a routine that did that. */ struct goacc_thread *thr = goacc_thread (); assert (thr && thr->dev); + if (s == 0) + return; if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { - if (from) - memmove (h, d, s); - else - memmove (d, h, s); + if (src == dst) + return; + memcpy (dst, src, s); return; } @@ -199,10 +200,15 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, } goacc_aq aq = get_goacc_asyncqueue (async); - if (from) - gomp_copy_dev2host (thr->dev, aq, h, d, s); + if (dev_to && dev_from) + { + if (dst != src) + gomp_copy_dev2dev (thr->dev, aq, dst, src, s); + } + else if (dev_from) + gomp_copy_dev2host (thr->dev, aq, dst, src, s); else - gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (thr->dev, aq, dst, src, s, false, /* TODO: cbuf? */ NULL); if (profiling_p) { @@ -214,25 +220,37 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, void acc_memcpy_to_device (void *d, void *h, size_t s) { - memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); + memcpy_tofrom_device (true, false, d, h, s, acc_async_sync, __FUNCTION__); } void acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) { - memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); + memcpy_tofrom_device (true, false, d, h, s, async, __FUNCTION__); } void acc_memcpy_from_device (void *h, void *d, size_t s) { - memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); + memcpy_tofrom_device (false, true, h, d, s, acc_async_sync, __FUNCTION__); } void acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) { - memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); + memcpy_tofrom_device (false, true, h, d, s, async, __FUNCTION__); +} + +void +acc_memcpy_device (void *dst, void *src, size_t s) +{ + memcpy_tofrom_device (true, true, dst, src, s, acc_async_sync, __FUNCTION__); +} + +void +acc_memcpy_device_async (void *dst, void *src, size_t s, int async) +{ + memcpy_tofrom_device (true, true, dst, src, s, async, __FUNCTION__); } /* Return the device pointer that corresponds to host data H. Or NULL diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90 index a3d7bcb..55894df 100644 --- a/libgomp/openacc.f90 +++ b/libgomp/openacc.f90 @@ -797,6 +797,7 @@ module openacc public :: acc_copyout_finalize, acc_delete_finalize public :: acc_memcpy_to_device, acc_memcpy_to_device_async public :: acc_memcpy_from_device, acc_memcpy_from_device_async + public :: acc_memcpy_device, acc_memcpy_device_async integer, parameter :: openacc_version = 201811 @@ -1046,6 +1047,27 @@ module openacc end subroutine end interface + interface + subroutine acc_memcpy_device (data_dev_dest, data_dev_src, bytes) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + end subroutine + end interface + + interface + subroutine acc_memcpy_device_async (data_dev_dest, data_dev_src, & + bytes, async_arg) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + import :: acc_handle_kind + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + interface acc_copyin_async procedure :: acc_copyin_async_32_h procedure :: acc_copyin_async_64_h diff --git a/libgomp/openacc.h b/libgomp/openacc.h index a520bbe..3085b00 100644 --- a/libgomp/openacc.h +++ b/libgomp/openacc.h @@ -123,6 +123,7 @@ void *acc_hostptr (void *) __GOACC_NOTHROW; int acc_is_present (void *, size_t) __GOACC_NOTHROW; void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW; +void acc_memcpy_device (void *, void *, size_t) __GOACC_NOTHROW; void acc_attach (void **) __GOACC_NOTHROW; void acc_attach_async (void **, int) __GOACC_NOTHROW; void acc_detach (void **) __GOACC_NOTHROW; @@ -136,7 +137,7 @@ void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW; void acc_detach_finalize (void **) __GOACC_NOTHROW; void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW; -/* Async functions, specified in OpenACC 2.5. */ +/* Async functions, specified in OpenACC 2.5, acc_memcpy_device in 2.6. */ void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW; void acc_create_async (void *, size_t, int) __GOACC_NOTHROW; void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW; @@ -145,6 +146,7 @@ void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW; void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW; void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; +void acc_memcpy_device_async (void *, void *, size_t, int) __GOACC_NOTHROW; /* CUDA-specific routines. */ void *acc_get_current_cuda_device (void) __GOACC_NOTHROW; diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h index d830574..e0e7788 100644 --- a/libgomp/openacc_lib.h +++ b/libgomp/openacc_lib.h @@ -528,6 +528,30 @@ end subroutine end interface + interface + subroutine acc_memcpy_device(data_dev_dest, data_dev_src, & + & bytes) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + end subroutine + end interface + + interface + subroutine acc_memcpy_device_async(data_dev_dest, & + & data_dev_src, bytes, & + & async_arg) bind(C) + use iso_c_binding, only: c_ptr, c_size_t + import :: acc_handle_kind + type(c_ptr), value :: data_dev_dest + type(c_ptr), value :: data_dev_src + integer(c_size_t), value :: bytes + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface acc_copyin_async subroutine acc_copyin_async_32_h (a, len, async) use iso_c_binding, only: c_int32_t diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index b39a94b..f823b27 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -5081,7 +5081,8 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, queue_push_callback (aq, fn, data); } -/* Queue up an asynchronous data copy from host to DEVICE. */ +/* Queue up an asynchronous data copy from host to DEVICE. + (Also handles dev2host and dev2dev.) */ bool GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src, @@ -5099,10 +5100,16 @@ bool GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src, size_t n, struct goacc_asyncqueue *aq) { - struct agent_info *agent = get_agent_info (device); - assert (agent == aq->agent); - queue_push_copy (aq, dst, src, n); - return true; + return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq); +} + +/* Queue up an asynchronous data copy from DEVICE to DEVICE. */ + +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int device, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq); } union goacc_property_value diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index a6c8198..712c8b7 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -2060,6 +2060,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, } static bool +cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s) +{ + CUdeviceptr pb1, pb2; + size_t ps1, ps2; + if (!s) + return true; + if (!d1 || !d2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1); + CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2); + if (!pb1 || !pb2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + if ((void *)(d1 + s) > (void *)(pb1 + ps1) + || (void *)(d2 + s) > (void *)(pb2 + ps2)) + { + GOMP_PLUGIN_error ("invalid size"); + return false; + } + return true; +} + +static bool cuda_memcpy_sanity_check (const void *h, const void *d, size_t s) { CUdeviceptr pb; @@ -2118,6 +2146,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n) bool GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n) { + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL); return true; } @@ -2329,6 +2360,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src, return true; } +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; + CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, + aq->cuda_stream); + return true; +} + union goacc_property_value GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop) { diff --git a/libgomp/target.c b/libgomp/target.c index 01434f8..4ad803a 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -461,6 +461,19 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep, gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); } +attribute_hidden void +gomp_copy_dev2dev (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, + void *dst, const void *src, size_t sz) +{ + if (__builtin_expect (aq != NULL, 0)) + goacc_device_copy_async (devicep, devicep->openacc.async.dev2dev_func, + "dev", dst, "dev", src, NULL, sz, aq); + else + gomp_device_copy (devicep, devicep->dev2dev_func, "dev", dst, + "dev", src, sz); +} + static void gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) { @@ -6312,6 +6325,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev) + || !DLSYM_OPT (openacc.async.dev2dev, openacc_async_dev2dev) || !DLSYM_OPT (openacc.get_property, openacc_get_property)) { /* Require all the OpenACC handlers if we have diff --git a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 index 7b3e09f..d6f4d5b 100644 --- a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 @@ -1,4 +1,5 @@ -! { dg-do run } +! { dg-do run { target { ! offload_target_nvptx } } } +! { dg-do compile { target offload_target_nvptx } } program test implicit none @@ -33,6 +34,10 @@ program test contains subroutine f (x, y, z) integer :: x(N), y(N), z(N) + ! The following fails as on the host the target side cannot be + ! resolved - and the 'teams' or not status affects how 'target' + ! is called. -> See PR118694, esp. comment 9. + ! Note also the dg-do compile above for offload_target_nvptx !$omp target map (to: x, y) map(from: z) block @@ -43,6 +48,7 @@ contains z(i) = x(i) * y(i) enddo end block + ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */ end subroutine subroutine g (x, y, z) integer :: x(N), y(N), z(N) @@ -56,6 +62,7 @@ contains z(i) = x(i) * y(i) enddo end block + ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */ !$omp end target end subroutine end program diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c new file mode 100644 index 0000000..eda651d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c @@ -0,0 +1,96 @@ +/* { dg-prune-output "using .vector_length \\(32\\)" } */ + +/* PR libgomp/93226 */ + +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <openacc.h> + +enum { N = 1024 }; + +static int D[N]; +#pragma acc declare device_resident(D) + +#pragma acc routine +intptr_t init_d() +{ + for (int i = 0; i < N; i++) + D[i] = 27*i; + return (intptr_t) &D[0]; +} + +int +main () +{ + int *a, *b, *e; + void *d_a, *d_b, *d_c, *d_d, *d_e, *d_f; + intptr_t intptr; + bool fail = false; + + a = (int *) malloc (N*sizeof (int)); + b = (int *) malloc (N*sizeof (int)); + e = (int *) malloc (N*sizeof (int)); + d_c = acc_malloc (N*sizeof (int)); + d_f = acc_malloc (N*sizeof (int)); + + memset (e, 0xff, N*sizeof (int)); + d_e = acc_copyin (e, N*sizeof (int)); + + #pragma acc serial copyout(intptr) + intptr = init_d (); + d_d = (void*) intptr; + acc_memcpy_device (d_c, d_d, N*sizeof (int)); + + #pragma acc serial copy(fail) deviceptr(d_c) firstprivate(intptr) + { + int *cc = (int *) d_c; + int *dd = (int *) intptr; + for (int i = 0; i < N; i++) + if (dd[i] != 27*i || cc[i] != 27*i) + { + fail = true; + __builtin_abort (); + } + } + if (fail) __builtin_abort (); + + for (int i = 0; i < N; i++) + a[i] = 11*i; + for (int i = 0; i < N; i++) + b[i] = 31*i; + + d_a = acc_copyin (a, N*sizeof (int)); + acc_copyin_async (b, N*sizeof (int), acc_async_noval); + + #pragma acc parallel deviceptr(d_c) async + { + int *cc = (int *) d_c; + #pragma acc loop + for (int i = 0; i < N; i++) + cc[i] = -17*i; + } + + acc_memcpy_device_async (d_d, d_a, N*sizeof (int), acc_async_noval); + acc_memcpy_device_async (d_f, d_c, N*sizeof (int), acc_async_noval); + acc_wait (acc_async_noval); + d_b = acc_deviceptr (b); + acc_memcpy_device_async (d_e, d_b, N*sizeof (int), acc_async_noval); + acc_wait (acc_async_noval); + + #pragma acc serial deviceptr(d_d, d_e, d_f) copy(fail) + { + int *dd = (int *) d_d; + int *ee = (int *) d_e; + int *ff = (int *) d_f; + for (int i = 0; i < N; i++) + if (dd[i] != 11*i + || ee[i] != 31*i + || ff[i] != -17*i) + { + fail = true; + __builtin_abort (); + } + } + if (fail) __builtin_abort (); +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 new file mode 100644 index 0000000..8f3a8f0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 @@ -0,0 +1,113 @@ +! { dg-prune-output "using .vector_length \\(32\\)" } + +! PR libgomp/93226 */ + +module m + use iso_c_binding + use openacc + implicit none (external, type) + + integer, parameter :: N = 1024 + + integer :: D(N) + !$acc declare device_resident(D) + +contains + + integer(c_intptr_t) function init_d() + !$acc routine + integer :: i + do i = 1, N + D(i) = 27*i + end do + init_d = loc(D) + end +end module + +program main + use m + implicit none (external, type) + + integer, allocatable, target :: a(:), b(:), e(:) + type(c_ptr) :: d_a, d_b, d_c, d_d, d_e, d_f + integer(c_intptr_t) intptr + integer :: i + logical fail + + fail = .false. + + allocate(a(N), b(N), e(N)) + d_c = acc_malloc (N*c_sizeof (i)) + d_f = acc_malloc (N*c_sizeof (i)) + + e = huge(e) + call acc_copyin (e, N*c_sizeof (i)); + d_e = acc_deviceptr (e); + + !$acc serial copyout(intptr) + intptr = init_d () + !$acc end serial + d_d = transfer(intptr, d_d) + call acc_memcpy_device (d_c, d_d, N*c_sizeof (i)) + + !$acc serial copy(fail) copy(a) deviceptr(d_c, d_d) firstprivate(intptr) + block + integer, pointer :: cc(:), dd(:) + call c_f_pointer (d_c, cc, [N]) + call c_f_pointer (d_d, dd, [N]) + a = cc + do i = 1, N + if (dd(i) /= 27*i .or. cc(i) /= 27*i) then + fail = .true. + stop 1 + end if + end do + end block + !$acc end serial + if (fail) error stop 1 + + do i = 1, N + a(i) = 11*i + b(i) = 31*i + end do + + call acc_copyin (a, N*c_sizeof (i)) + d_a = acc_deviceptr (a) + call acc_copyin_async (b, N*c_sizeof (i), acc_async_noval) + + !$acc parallel deviceptr(d_c) private(i) async + block + integer, pointer :: cc(:) + call c_f_pointer (d_c, cc, [N]) + !$acc loop + do i = 1, N + cc(i) = -17*i + end do + end block + !$acc end parallel + + call acc_memcpy_device_async (d_d, d_a, N*c_sizeof (i), acc_async_noval) + call acc_memcpy_device_async (d_f, d_c, N*c_sizeof (i), acc_async_noval) + call acc_wait (acc_async_noval) + d_b = acc_deviceptr (b) + call acc_memcpy_device_async (d_e, d_b, N*c_sizeof (i), acc_async_noval) + call acc_wait (acc_async_noval) + + !$acc serial deviceptr(d_d, d_e, d_f) private(i) copy(fail) + block + integer, pointer :: dd(:), ee(:), ff(:) + call c_f_pointer (d_d, dd, [N]) + call c_f_pointer (d_e, ee, [N]) + call c_f_pointer (d_f, ff, [N]) + do i = 1, N + if (dd(i) /= 11*i & + .or. ee(i) /= 31*i & + .or. ff(i) /= -17*i) then + fail = .true. + stop 2 + end if + end do + end block + !$acc end serial + if (fail) error stop 2 +end |