diff options
-rw-r--r-- | openmp/libomptarget/src/omptarget.cpp | 3 | ||||
-rw-r--r-- | openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp | 85 |
2 files changed, 87 insertions, 1 deletions
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index f97cacf..821669d 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -188,7 +188,8 @@ static int initLibrary(DeviceTy &Device) { // If unified memory is active, the corresponding global is a device // reference to the host global. We need to initialize the pointer on // the deive to point to the memory on the host. - if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) { + if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) || + (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) { if (Device.RTL->data_submit(DeviceId, DeviceEntry.addr, Entry.addr, Entry.size) != OFFLOAD_SUCCESS) REPORT("Failed to write symbol for USM %s\n", Entry.name); diff --git a/openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp b/openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp new file mode 100644 index 0000000..4a13d27 --- /dev/null +++ b/openmp/libomptarget/test/mapping/auto_zero_copy_globals.cpp @@ -0,0 +1,85 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic +// RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefix=CHECK + +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: nvptx64-nvidia-cuda +// UNSUPPORTED: nvptx64-nvidia-cuda-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +// REQUIRES: unified_shared_memory + +// clang-format on + +#include <cstdint> +#include <cstdio> + +/// Test for globals under automatic zero-copy. +/// Because we are building without unified_shared_memory +/// requirement pragma, all globals are allocated in the device +/// memory of all used GPUs. To ensure those globals contain the intended +/// values, we need to execute H2D and D2H memory copies even if we are running +/// in automatic zero-copy. This only applies to globals. Local variables (their +/// host pointers) are passed to the kernels by-value, according to the +/// automatic zero-copy behavior. + +#pragma omp begin declare target +int32_t x; // 4 bytes +int32_t z[10]; // 40 bytes +int32_t *k; // 20 bytes +#pragma omp end declare target + +int main() { + int32_t *dev_k = nullptr; + x = 3; + int32_t y = -1; + for (size_t t = 0; t < 10; t++) + z[t] = t; + k = new int32_t[5]; + + printf("Host pointer for k = %p\n", k); + for (size_t t = 0; t < 5; t++) + k[t] = -t; + +/// target update to forces a copy between host and device global, which we must +/// execute to keep the two global copies consistent. CHECK: Copying data from +/// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z +#pragma omp target update to(z[ : 10]) + +/// target map with always modifier (for x) forces a copy between host and +/// device global, which we must execute to keep the two global copies +/// consistent. k's content (host address) is passed by-value to the kernel +/// (Size=20 case). y, being a local variable, is also passed by-value to the +/// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified +/// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared +/// memory CHECK: Copying data from host to device, HstPtr={{.*}}, +/// TgtPtr={{.*}}, Size=4, Name=x +#pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y) \ + map(from : dev_k) + { + x++; + y++; + for (size_t t = 0; t < 10; t++) + z[t]++; + dev_k = k; + } +/// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, +/// Size=20, Name=k + +/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, +/// Size=4, Name=x + +/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, +/// Size=40, Name=z +#pragma omp target update from(z[ : 10]) + + /// CHECK-NOT: k pointer not correctly passed to kernel + if (dev_k != k) + printf("k pointer not correctly passed to kernel\n"); + + delete[] k; + return 0; +} |