aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--openmp/libomptarget/src/omptarget.cpp3
-rw-r--r--openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp85
2 files changed, 87 insertions, 1 deletions
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index f97cacf..821669d 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -188,7 +188,8 @@ static int initLibrary(DeviceTy &Device) {
// If unified memory is active, the corresponding global is a device
// reference to the host global. We need to initialize the pointer on
// the deive to point to the memory on the host.
- if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) {
+ if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
+ (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) {
if (Device.RTL->data_submit(DeviceId, DeviceEntry.addr, Entry.addr,
Entry.size) != OFFLOAD_SUCCESS)
REPORT("Failed to write symbol for USM %s\n", Entry.name);
diff --git a/openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp b/openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp
new file mode 100644
index 0000000..4a13d27
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp
@@ -0,0 +1,85 @@
+// clang-format off
+// RUN: %libomptarget-compilexx-generic
+// RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic -check-prefix=CHECK
+
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+// REQUIRES: unified_shared_memory
+
+// clang-format on
+
+#include <cstdint>
+#include <cstdio>
+
+/// Test for globals under automatic zero-copy.
+/// Because we are building without unified_shared_memory
+/// requirement pragma, all globals are allocated in the device
+/// memory of all used GPUs. To ensure those globals contain the intended
+/// values, we need to execute H2D and D2H memory copies even if we are running
+/// in automatic zero-copy. This only applies to globals. Local variables (their
+/// host pointers) are passed to the kernels by-value, according to the
+/// automatic zero-copy behavior.
+
+#pragma omp begin declare target
+int32_t x; // 4 bytes
+int32_t z[10]; // 40 bytes
+int32_t *k; // 20 bytes
+#pragma omp end declare target
+
+int main() {
+ int32_t *dev_k = nullptr;
+ x = 3;
+ int32_t y = -1;
+ for (size_t t = 0; t < 10; t++)
+ z[t] = t;
+ k = new int32_t[5];
+
+ printf("Host pointer for k = %p\n", k);
+ for (size_t t = 0; t < 5; t++)
+ k[t] = -t;
+
+/// target update to forces a copy between host and device global, which we must
+/// execute to keep the two global copies consistent. CHECK: Copying data from
+/// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z
+#pragma omp target update to(z[ : 10])
+
+/// target map with always modifier (for x) forces a copy between host and
+/// device global, which we must execute to keep the two global copies
+/// consistent. k's content (host address) is passed by-value to the kernel
+/// (Size=20 case). y, being a local variable, is also passed by-value to the
+/// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified
+/// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared
+/// memory CHECK: Copying data from host to device, HstPtr={{.*}},
+/// TgtPtr={{.*}}, Size=4, Name=x
+#pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y) \
+ map(from : dev_k)
+ {
+ x++;
+ y++;
+ for (size_t t = 0; t < 10; t++)
+ z[t]++;
+ dev_k = k;
+ }
+/// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
+/// Size=20, Name=k
+
+/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
+/// Size=4, Name=x
+
+/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
+/// Size=40, Name=z
+#pragma omp target update from(z[ : 10])
+
+ /// CHECK-NOT: k pointer not correctly passed to kernel
+ if (dev_k != k)
+ printf("k pointer not correctly passed to kernel\n");
+
+ delete[] k;
+ return 0;
+}