aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog18
-rw-r--r--libgomp/ChangeLog.omp38
-rw-r--r--libgomp/libgomp-plugin.h2
-rw-r--r--libgomp/libgomp.h4
-rw-r--r--libgomp/libgomp.map6
-rw-r--r--libgomp/libgomp.texi39
-rw-r--r--libgomp/oacc-mem.c44
-rw-r--r--libgomp/openacc.f9022
-rw-r--r--libgomp/openacc.h4
-rw-r--r--libgomp/openacc_lib.h24
-rw-r--r--libgomp/plugin/plugin-gcn.c17
-rw-r--r--libgomp/plugin/plugin-nvptx.c43
-rw-r--r--libgomp/target.c14
-rw-r--r--libgomp/testsuite/libgomp.fortran/metadirective-1.f909
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c96
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90113
16 files changed, 472 insertions, 21 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 6cef1dd..c9035c3 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,21 @@
+2025-05-28 Tobias Burnus <tburnus@baylibre.com>
+
+ Backported from master:
+ 2025-05-28 Tobias Burnus <tburnus@baylibre.com>
+
+ PR middle-end/118694
+ * testsuite/libgomp.fortran/metadirective-1.f90: xfail when
+ compiling (also) for nvptx offloading as an error is then expected.
+
+2025-05-26 Tobias Burnus <tburnus@baylibre.com>
+
+ Backported from master:
+ 2025-05-23 Tobias Burnus <tburnus@baylibre.com>
+
+ PR middle-end/118694
+ * testsuite/libgomp.c-c++-common/metadirective-1.c: xfail when
+ compiling (also) for nvptx offloading as an error is then expected.
+
2025-05-19 Tobias Burnus <tburnus@baylibre.com>
Backported from master:
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index afc8184..5ecab6d 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -122,6 +122,42 @@
* testsuite/libgomp.c-c++-common/target-abi-struct-1.c: New.
* testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: Adjust.
+2025-05-30 Tobias Burnus <tburnus@baylibre.com>
+
+ Backported from master:
+ 2025-05-29 Tobias Burnus <tburnus@baylibre.com>
+
+ PR libgomp/93226
+ * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_dev2dev): New
+ prototype.
+ * libgomp.h (struct acc_dispatch_t): Add dev2dev_func.
+ (gomp_copy_dev2dev): New prototype.
+ * libgomp.map (OACC_2.6.1): New; add acc_memcpy_device{,_async}.
+ * libgomp.texi (acc_memcpy_device): New.
+ * oacc-mem.c (memcpy_tofrom_device): Change to take from/to
+ device boolean; use memcpy not memmove; add early return if
+ size == 0 or same device + same ptr.
+ (acc_memcpy_to_device, acc_memcpy_to_device_async,
+ acc_memcpy_from_device, acc_memcpy_from_device_async): Update.
+ (acc_memcpy_device, acc_memcpy_device_async): New.
+ * openacc.f90 (acc_memcpy_device, acc_memcpy_device_async):
+ Add interface.
+ * openacc_lib.h (acc_memcpy_device, acc_memcpy_device_async):
+ Likewise.
+ * openacc.h (acc_memcpy_device, acc_memcpy_device_async): Add
+ prototype.
+ * plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev):
+ Update comment.
+ (GOMP_OFFLOAD_openacc_async_dev2host): Update call.
+ (GOMP_OFFLOAD_openacc_async_dev2dev): New.
+ * plugin/plugin-nvptx.c (cuda_memcpy_dev_sanity_check): New.
+ (GOMP_OFFLOAD_dev2dev): Call it.
+ (GOMP_OFFLOAD_openacc_async_dev2dev): New.
+ * target.c (gomp_copy_dev2dev): New.
+ (gomp_load_plugin_for_device): Load dev2dev and async_dev2dev.
+ * testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c: New test.
+ * testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90: New test.
+
2025-05-22 Thomas Schwinge <tschwinge@baylibre.com>
Backported from master:
@@ -1014,4 +1050,4 @@
* testsuite/libgomp.oacc-c-c++-common/noncontig_array-3.c: New test.
* testsuite/libgomp.oacc-c-c++-common/noncontig_array-4.c: New test.
* testsuite/libgomp.oacc-c-c++-common/noncontig_array-utils.h: Support
- header for new tests. \ No newline at end of file
+ header for new tests.
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 479264b..3c7741b 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -202,6 +202,8 @@ extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size
struct goacc_asyncqueue *);
extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t,
struct goacc_asyncqueue *);
+extern bool GOMP_OFFLOAD_openacc_async_dev2dev (int, void *, const void *, size_t,
+ struct goacc_asyncqueue *);
extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void);
extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *);
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 04f3c6d..571ac62c 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1378,6 +1378,7 @@ typedef struct acc_dispatch_t
__typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func;
__typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func;
__typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func;
+ __typeof (GOMP_OFFLOAD_openacc_async_dev2dev) *dev2dev_func;
} async;
__typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func;
@@ -1485,6 +1486,9 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *,
extern void gomp_copy_dev2host (struct gomp_device_descr *,
struct goacc_asyncqueue *, void *, const void *,
size_t);
+extern void gomp_copy_dev2dev (struct gomp_device_descr *,
+ struct goacc_asyncqueue *, void *, const void *,
+ size_t);
extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t);
extern bool gomp_attach_pointer (struct gomp_device_descr *,
struct goacc_asyncqueue *, splay_tree,
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 6e2cdbf..bc2de6b 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -610,6 +610,12 @@ OACC_2.6 {
acc_get_property_string_h_;
} OACC_2.5.1;
+OACC_2.6.1 {
+ global:
+ acc_memcpy_device;
+ acc_memcpy_device_async;
+} OACC_2.6;
+
GOACC_2.0 {
global:
GOACC_data_end;
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 658df0e..e1b70b0 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -4780,6 +4780,7 @@ acceleration device.
present on device.
* acc_memcpy_to_device:: Copy host memory to device memory.
* acc_memcpy_from_device:: Copy device memory to host memory.
+* acc_memcpy_device:: Copy memory within a device.
* acc_attach:: Let device pointer point to device-pointer target.
* acc_detach:: Let device pointer point to host-pointer target.
@@ -5854,6 +5855,44 @@ This function copies device memory specified by device address of
+@node acc_memcpy_device
+@section @code{acc_memcpy_device} -- Copy memory within a device.
+@table @asis
+@item @emph{Description}
+This function copies device memory from one memory location to another
+on the current device. It copies @var{bytes} bytes of data from the device
+address, specified by @var{data_dev_src}, to the device address
+@var{data_dev_dest}. The @code{_async} version performs the transfer
+asnychronously using the queue associated with @var{async_arg}.
+
+@item @emph{C/C++}:
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{void acc_memcpy_device(d_void* data_dev_dest,}
+@item @tab @code{d_void* data_dev_src, size_t bytes);}
+@item @emph{Prototype}: @tab @code{void acc_memcpy_device_async(d_void* data_dev_dest,}
+@item @tab @code{d_void* data_dev_src, size_t bytes, int async_arg);}
+@end multitable
+
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device(data_dev_dest, &}
+@item @tab @code{data_dev_src, bytes)}
+@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device_async(data_dev_dest, &}
+@item @tab @code{data_dev_src, bytes, async_arg)}
+@item @tab @code{type(c_ptr), value :: data_dev_dest}
+@item @tab @code{type(c_ptr), value :: data_dev_src}
+@item @tab @code{integer(c_size_t), value :: bytes}
+@item @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
+
+@item @emph{Reference}:
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.33. @uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.28.
+@end table
+
+
+
@node acc_attach
@section @code{acc_attach} -- Let device pointer point to device-pointer target.
@table @asis
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index a00ea16..e40b41b 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -171,21 +171,22 @@ acc_free (void *d)
}
static void
-memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
- const char *libfnname)
+memcpy_tofrom_device (bool dev_to, bool dev_from, void *dst, void *src,
+ size_t s, int async, const char *libfnname)
{
/* No need to call lazy open here, as the device pointer must have
been obtained from a routine that did that. */
struct goacc_thread *thr = goacc_thread ();
assert (thr && thr->dev);
+ if (s == 0)
+ return;
if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
{
- if (from)
- memmove (h, d, s);
- else
- memmove (d, h, s);
+ if (src == dst)
+ return;
+ memcpy (dst, src, s);
return;
}
@@ -199,10 +200,15 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
}
goacc_aq aq = get_goacc_asyncqueue (async);
- if (from)
- gomp_copy_dev2host (thr->dev, aq, h, d, s);
+ if (dev_to && dev_from)
+ {
+ if (dst != src)
+ gomp_copy_dev2dev (thr->dev, aq, dst, src, s);
+ }
+ else if (dev_from)
+ gomp_copy_dev2host (thr->dev, aq, dst, src, s);
else
- gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
+ gomp_copy_host2dev (thr->dev, aq, dst, src, s, false, /* TODO: cbuf? */ NULL);
if (profiling_p)
{
@@ -214,25 +220,37 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
void
acc_memcpy_to_device (void *d, void *h, size_t s)
{
- memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
+ memcpy_tofrom_device (true, false, d, h, s, acc_async_sync, __FUNCTION__);
}
void
acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
{
- memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
+ memcpy_tofrom_device (true, false, d, h, s, async, __FUNCTION__);
}
void
acc_memcpy_from_device (void *h, void *d, size_t s)
{
- memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
+ memcpy_tofrom_device (false, true, h, d, s, acc_async_sync, __FUNCTION__);
}
void
acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
{
- memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
+ memcpy_tofrom_device (false, true, h, d, s, async, __FUNCTION__);
+}
+
+void
+acc_memcpy_device (void *dst, void *src, size_t s)
+{
+ memcpy_tofrom_device (true, true, dst, src, s, acc_async_sync, __FUNCTION__);
+}
+
+void
+acc_memcpy_device_async (void *dst, void *src, size_t s, int async)
+{
+ memcpy_tofrom_device (true, true, dst, src, s, async, __FUNCTION__);
}
/* Return the device pointer that corresponds to host data H. Or NULL
diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90
index a3d7bcb..55894df 100644
--- a/libgomp/openacc.f90
+++ b/libgomp/openacc.f90
@@ -797,6 +797,7 @@ module openacc
public :: acc_copyout_finalize, acc_delete_finalize
public :: acc_memcpy_to_device, acc_memcpy_to_device_async
public :: acc_memcpy_from_device, acc_memcpy_from_device_async
+ public :: acc_memcpy_device, acc_memcpy_device_async
integer, parameter :: openacc_version = 201811
@@ -1046,6 +1047,27 @@ module openacc
end subroutine
end interface
+ interface
+ subroutine acc_memcpy_device (data_dev_dest, data_dev_src, bytes) bind(C)
+ use iso_c_binding, only: c_ptr, c_size_t
+ type(c_ptr), value :: data_dev_dest
+ type(c_ptr), value :: data_dev_src
+ integer(c_size_t), value :: bytes
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_memcpy_device_async (data_dev_dest, data_dev_src, &
+ bytes, async_arg) bind(C)
+ use iso_c_binding, only: c_ptr, c_size_t
+ import :: acc_handle_kind
+ type(c_ptr), value :: data_dev_dest
+ type(c_ptr), value :: data_dev_src
+ integer(c_size_t), value :: bytes
+ integer(acc_handle_kind), value :: async_arg
+ end subroutine
+ end interface
+
interface acc_copyin_async
procedure :: acc_copyin_async_32_h
procedure :: acc_copyin_async_64_h
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index a520bbe..3085b00 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -123,6 +123,7 @@ void *acc_hostptr (void *) __GOACC_NOTHROW;
int acc_is_present (void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
+void acc_memcpy_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_attach (void **) __GOACC_NOTHROW;
void acc_attach_async (void **, int) __GOACC_NOTHROW;
void acc_detach (void **) __GOACC_NOTHROW;
@@ -136,7 +137,7 @@ void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_detach_finalize (void **) __GOACC_NOTHROW;
void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
-/* Async functions, specified in OpenACC 2.5. */
+/* Async functions, specified in OpenACC 2.5, acc_memcpy_device in 2.6. */
void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_create_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW;
@@ -145,6 +146,7 @@ void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
+void acc_memcpy_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
/* CUDA-specific routines. */
void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h
index d830574..e0e7788 100644
--- a/libgomp/openacc_lib.h
+++ b/libgomp/openacc_lib.h
@@ -528,6 +528,30 @@
end subroutine
end interface
+ interface
+ subroutine acc_memcpy_device(data_dev_dest, data_dev_src, &
+ & bytes) bind(C)
+ use iso_c_binding, only: c_ptr, c_size_t
+ type(c_ptr), value :: data_dev_dest
+ type(c_ptr), value :: data_dev_src
+ integer(c_size_t), value :: bytes
+ end subroutine
+ end interface
+
+ interface
+ subroutine acc_memcpy_device_async(data_dev_dest, &
+ & data_dev_src, bytes, &
+ & async_arg) bind(C)
+ use iso_c_binding, only: c_ptr, c_size_t
+ import :: acc_handle_kind
+ type(c_ptr), value :: data_dev_dest
+ type(c_ptr), value :: data_dev_src
+ integer(c_size_t), value :: bytes
+ integer(acc_handle_kind), value :: async_arg
+ end subroutine
+ end interface
+
+
interface acc_copyin_async
subroutine acc_copyin_async_32_h (a, len, async)
use iso_c_binding, only: c_int32_t
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index b39a94b..f823b27 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -5081,7 +5081,8 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
queue_push_callback (aq, fn, data);
}
-/* Queue up an asynchronous data copy from host to DEVICE. */
+/* Queue up an asynchronous data copy from host to DEVICE.
+ (Also handles dev2host and dev2dev.) */
bool
GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src,
@@ -5099,10 +5100,16 @@ bool
GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src,
size_t n, struct goacc_asyncqueue *aq)
{
- struct agent_info *agent = get_agent_info (device);
- assert (agent == aq->agent);
- queue_push_copy (aq, dst, src, n);
- return true;
+ return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq);
+}
+
+/* Queue up an asynchronous data copy from DEVICE to DEVICE. */
+
+bool
+GOMP_OFFLOAD_openacc_async_dev2dev (int device, void *dst, const void *src,
+ size_t n, struct goacc_asyncqueue *aq)
+{
+ return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq);
}
union goacc_property_value
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index a6c8198..712c8b7 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -2060,6 +2060,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
}
static bool
+cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s)
+{
+ CUdeviceptr pb1, pb2;
+ size_t ps1, ps2;
+ if (!s)
+ return true;
+ if (!d1 || !d2)
+ {
+ GOMP_PLUGIN_error ("invalid device address");
+ return false;
+ }
+ CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1);
+ CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2);
+ if (!pb1 || !pb2)
+ {
+ GOMP_PLUGIN_error ("invalid device address");
+ return false;
+ }
+ if ((void *)(d1 + s) > (void *)(pb1 + ps1)
+ || (void *)(d2 + s) > (void *)(pb2 + ps2))
+ {
+ GOMP_PLUGIN_error ("invalid size");
+ return false;
+ }
+ return true;
+}
+
+static bool
cuda_memcpy_sanity_check (const void *h, const void *d, size_t s)
{
CUdeviceptr pb;
@@ -2118,6 +2146,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
bool
GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
{
+ if (!nvptx_attach_host_thread_to_device (ord)
+ || !cuda_memcpy_dev_sanity_check (dst, src, n))
+ return false;
CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL);
return true;
}
@@ -2329,6 +2360,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src,
return true;
}
+bool
+GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src,
+ size_t n, struct goacc_asyncqueue *aq)
+{
+ if (!nvptx_attach_host_thread_to_device (ord)
+ || !cuda_memcpy_dev_sanity_check (dst, src, n))
+ return false;
+ CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n,
+ aq->cuda_stream);
+ return true;
+}
+
union goacc_property_value
GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop)
{
diff --git a/libgomp/target.c b/libgomp/target.c
index 01434f8..4ad803a 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -461,6 +461,19 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep,
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
}
+attribute_hidden void
+gomp_copy_dev2dev (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq,
+ void *dst, const void *src, size_t sz)
+{
+ if (__builtin_expect (aq != NULL, 0))
+ goacc_device_copy_async (devicep, devicep->openacc.async.dev2dev_func,
+ "dev", dst, "dev", src, NULL, sz, aq);
+ else
+ gomp_device_copy (devicep, devicep->dev2dev_func, "dev", dst,
+ "dev", src, sz);
+}
+
static void
gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
{
@@ -6312,6 +6325,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
|| !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
|| !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
|| !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
+ || !DLSYM_OPT (openacc.async.dev2dev, openacc_async_dev2dev)
|| !DLSYM_OPT (openacc.get_property, openacc_get_property))
{
/* Require all the OpenACC handlers if we have
diff --git a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
index 7b3e09f..d6f4d5b 100644
--- a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
@@ -1,4 +1,5 @@
-! { dg-do run }
+! { dg-do run { target { ! offload_target_nvptx } } }
+! { dg-do compile { target offload_target_nvptx } }
program test
implicit none
@@ -33,6 +34,10 @@ program test
contains
subroutine f (x, y, z)
integer :: x(N), y(N), z(N)
+ ! The following fails as on the host the target side cannot be
+ ! resolved - and the 'teams' or not status affects how 'target'
+ ! is called. -> See PR118694, esp. comment 9.
+ ! Note also the dg-do compile above for offload_target_nvptx
!$omp target map (to: x, y) map(from: z)
block
@@ -43,6 +48,7 @@ contains
z(i) = x(i) * y(i)
enddo
end block
+ ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */
end subroutine
subroutine g (x, y, z)
integer :: x(N), y(N), z(N)
@@ -56,6 +62,7 @@ contains
z(i) = x(i) * y(i)
enddo
end block
+ ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 } */
!$omp end target
end subroutine
end program
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c
new file mode 100644
index 0000000..eda651d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c
@@ -0,0 +1,96 @@
+/* { dg-prune-output "using .vector_length \\(32\\)" } */
+
+/* PR libgomp/93226 */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <openacc.h>
+
+enum { N = 1024 };
+
+static int D[N];
+#pragma acc declare device_resident(D)
+
+#pragma acc routine
+intptr_t init_d()
+{
+ for (int i = 0; i < N; i++)
+ D[i] = 27*i;
+ return (intptr_t) &D[0];
+}
+
+int
+main ()
+{
+ int *a, *b, *e;
+ void *d_a, *d_b, *d_c, *d_d, *d_e, *d_f;
+ intptr_t intptr;
+ bool fail = false;
+
+ a = (int *) malloc (N*sizeof (int));
+ b = (int *) malloc (N*sizeof (int));
+ e = (int *) malloc (N*sizeof (int));
+ d_c = acc_malloc (N*sizeof (int));
+ d_f = acc_malloc (N*sizeof (int));
+
+ memset (e, 0xff, N*sizeof (int));
+ d_e = acc_copyin (e, N*sizeof (int));
+
+ #pragma acc serial copyout(intptr)
+ intptr = init_d ();
+ d_d = (void*) intptr;
+ acc_memcpy_device (d_c, d_d, N*sizeof (int));
+
+ #pragma acc serial copy(fail) deviceptr(d_c) firstprivate(intptr)
+ {
+ int *cc = (int *) d_c;
+ int *dd = (int *) intptr;
+ for (int i = 0; i < N; i++)
+ if (dd[i] != 27*i || cc[i] != 27*i)
+ {
+ fail = true;
+ __builtin_abort ();
+ }
+ }
+ if (fail) __builtin_abort ();
+
+ for (int i = 0; i < N; i++)
+ a[i] = 11*i;
+ for (int i = 0; i < N; i++)
+ b[i] = 31*i;
+
+ d_a = acc_copyin (a, N*sizeof (int));
+ acc_copyin_async (b, N*sizeof (int), acc_async_noval);
+
+ #pragma acc parallel deviceptr(d_c) async
+ {
+ int *cc = (int *) d_c;
+ #pragma acc loop
+ for (int i = 0; i < N; i++)
+ cc[i] = -17*i;
+ }
+
+ acc_memcpy_device_async (d_d, d_a, N*sizeof (int), acc_async_noval);
+ acc_memcpy_device_async (d_f, d_c, N*sizeof (int), acc_async_noval);
+ acc_wait (acc_async_noval);
+ d_b = acc_deviceptr (b);
+ acc_memcpy_device_async (d_e, d_b, N*sizeof (int), acc_async_noval);
+ acc_wait (acc_async_noval);
+
+ #pragma acc serial deviceptr(d_d, d_e, d_f) copy(fail)
+ {
+ int *dd = (int *) d_d;
+ int *ee = (int *) d_e;
+ int *ff = (int *) d_f;
+ for (int i = 0; i < N; i++)
+ if (dd[i] != 11*i
+ || ee[i] != 31*i
+ || ff[i] != -17*i)
+ {
+ fail = true;
+ __builtin_abort ();
+ }
+ }
+ if (fail) __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90
new file mode 100644
index 0000000..8f3a8f0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90
@@ -0,0 +1,113 @@
+! { dg-prune-output "using .vector_length \\(32\\)" }
+
+! PR libgomp/93226 */
+
+module m
+ use iso_c_binding
+ use openacc
+ implicit none (external, type)
+
+ integer, parameter :: N = 1024
+
+ integer :: D(N)
+ !$acc declare device_resident(D)
+
+contains
+
+ integer(c_intptr_t) function init_d()
+ !$acc routine
+ integer :: i
+ do i = 1, N
+ D(i) = 27*i
+ end do
+ init_d = loc(D)
+ end
+end module
+
+program main
+ use m
+ implicit none (external, type)
+
+ integer, allocatable, target :: a(:), b(:), e(:)
+ type(c_ptr) :: d_a, d_b, d_c, d_d, d_e, d_f
+ integer(c_intptr_t) intptr
+ integer :: i
+ logical fail
+
+ fail = .false.
+
+ allocate(a(N), b(N), e(N))
+ d_c = acc_malloc (N*c_sizeof (i))
+ d_f = acc_malloc (N*c_sizeof (i))
+
+ e = huge(e)
+ call acc_copyin (e, N*c_sizeof (i));
+ d_e = acc_deviceptr (e);
+
+ !$acc serial copyout(intptr)
+ intptr = init_d ()
+ !$acc end serial
+ d_d = transfer(intptr, d_d)
+ call acc_memcpy_device (d_c, d_d, N*c_sizeof (i))
+
+ !$acc serial copy(fail) copy(a) deviceptr(d_c, d_d) firstprivate(intptr)
+ block
+ integer, pointer :: cc(:), dd(:)
+ call c_f_pointer (d_c, cc, [N])
+ call c_f_pointer (d_d, dd, [N])
+ a = cc
+ do i = 1, N
+ if (dd(i) /= 27*i .or. cc(i) /= 27*i) then
+ fail = .true.
+ stop 1
+ end if
+ end do
+ end block
+ !$acc end serial
+ if (fail) error stop 1
+
+ do i = 1, N
+ a(i) = 11*i
+ b(i) = 31*i
+ end do
+
+ call acc_copyin (a, N*c_sizeof (i))
+ d_a = acc_deviceptr (a)
+ call acc_copyin_async (b, N*c_sizeof (i), acc_async_noval)
+
+ !$acc parallel deviceptr(d_c) private(i) async
+ block
+ integer, pointer :: cc(:)
+ call c_f_pointer (d_c, cc, [N])
+ !$acc loop
+ do i = 1, N
+ cc(i) = -17*i
+ end do
+ end block
+ !$acc end parallel
+
+ call acc_memcpy_device_async (d_d, d_a, N*c_sizeof (i), acc_async_noval)
+ call acc_memcpy_device_async (d_f, d_c, N*c_sizeof (i), acc_async_noval)
+ call acc_wait (acc_async_noval)
+ d_b = acc_deviceptr (b)
+ call acc_memcpy_device_async (d_e, d_b, N*c_sizeof (i), acc_async_noval)
+ call acc_wait (acc_async_noval)
+
+ !$acc serial deviceptr(d_d, d_e, d_f) private(i) copy(fail)
+ block
+ integer, pointer :: dd(:), ee(:), ff(:)
+ call c_f_pointer (d_d, dd, [N])
+ call c_f_pointer (d_e, ee, [N])
+ call c_f_pointer (d_f, ff, [N])
+ do i = 1, N
+ if (dd(i) /= 11*i &
+ .or. ee(i) /= 31*i &
+ .or. ff(i) /= -17*i) then
+ fail = .true.
+ stop 2
+ end if
+ end do
+ end block
+ !$acc end serial
+ if (fail) error stop 2
+end