aboutsummaryrefslogtreecommitdiff
path: root/offload
diff options
context:
space:
mode:
Diffstat (limited to 'offload')
-rw-r--r--offload/libomptarget/OpenMP/InteropAPI.cpp41
-rw-r--r--offload/libomptarget/exports5
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp31
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp44
-rw-r--r--offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f9043
5 files changed, 159 insertions, 5 deletions
diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index eb5425e..c55ef2c 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -124,7 +124,7 @@ void *getProperty<void *>(omp_interop_val_t &InteropVal,
case omp_ipr_device_context:
return InteropVal.device_info.Context;
case omp_ipr_targetsync:
- return InteropVal.async_info->Queue;
+ return InteropVal.async_info ? InteropVal.async_info->Queue : nullptr;
default:;
}
getTypeMismatch(Property, Err);
@@ -167,7 +167,6 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr,
omp_interop_property_t property_id, \
int *err) { \
omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \
- assert((interop_val)->interop_type == kmp_interop_type_targetsync); \
if (!getPropertyCheck(&interop_val, property_id, err)) { \
return (RETURN_TYPE)(0); \
} \
@@ -275,8 +274,8 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType,
return Interop;
}
-int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop,
- interop_ctx_t *Ctx, dep_pack_t *Deps) {
+int __tgt_interop_use60(ident_t *LocRef, omp_interop_val_t *Interop,
+ interop_ctx_t *Ctx, dep_pack_t *Deps) {
bool Nowait = Ctx->flags.nowait;
DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__,
DPxPTR(Interop), Nowait);
@@ -359,6 +358,40 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop,
return omp_irc_success;
}
+// Backwards compatibility wrappers
+void __tgt_interop_init(ident_t *LocRef, int32_t Gtid,
+ omp_interop_val_t *&InteropPtr, int32_t InteropType,
+ int32_t DeviceId, int32_t Ndeps,
+ kmp_depend_info_t *DepList, int32_t HaveNowait) {
+ constexpr int32_t old_kmp_interop_type_targetsync = 2;
+ interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+ dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+ InteropPtr =
+ __tgt_interop_get(LocRef,
+ InteropType == old_kmp_interop_type_targetsync
+ ? kmp_interop_type_targetsync
+ : kmp_interop_type_target,
+ DeviceId, 0, nullptr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
+void __tgt_interop_use(ident_t *LocRef, int32_t Gtid,
+ omp_interop_val_t *&InteropPtr, int32_t DeviceId,
+ int32_t Ndeps, kmp_depend_info_t *DepList,
+ int32_t HaveNowait) {
+ interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+ dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+ __tgt_interop_use60(LocRef, InteropPtr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
+void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid,
+ omp_interop_val_t *&InteropPtr, int32_t DeviceId,
+ int32_t Ndeps, kmp_depend_info_t *DepList,
+ int32_t HaveNowait) {
+ interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid};
+ dep_pack_t Deps = {Ndeps, 0, DepList, nullptr};
+ __tgt_interop_release(LocRef, InteropPtr, &Ctx, Ndeps ? &Deps : nullptr);
+}
+
} // extern "C"
llvm::Expected<DeviceTy &> omp_interop_val_t::getDevice() const {
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 8e2db6b..1374bfe 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -68,8 +68,11 @@ VERS1.0 {
omp_get_interop_int;
omp_get_interop_name;
omp_get_interop_type_desc;
- __tgt_interop_get;
+ __tgt_interop_init;
__tgt_interop_use;
+ __tgt_interop_destroy;
+ __tgt_interop_get;
+ __tgt_interop_use60;
__tgt_interop_release;
__tgt_target_sync;
__llvmPushCallConfiguration;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 7b834ee..f73fa047 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2712,6 +2712,37 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Plugin::success();
}
+ interop_spec_t selectInteropPreference(int32_t InteropType,
+ int32_t NumPrefers,
+ interop_spec_t *Prefers) override {
+ // TODO: update once targetsync is supported
+ if (InteropType == kmp_interop_type_target)
+ return interop_spec_t{tgt_fr_hsa, {false, 0}, 0};
+ return interop_spec_t{tgt_fr_none, {false, 0}, 0};
+ }
+
+ Expected<omp_interop_val_t *>
+ createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override {
+ auto *Ret = new omp_interop_val_t(
+ DeviceId, static_cast<kmp_interop_type_t>(InteropType));
+ Ret->fr_id = tgt_fr_hsa;
+ Ret->vendor_id = omp_vendor_amd;
+
+ // TODO: implement targetsync support
+
+ Ret->device_info.Platform = nullptr;
+ Ret->device_info.Device = reinterpret_cast<void *>(Agent.handle);
+ Ret->device_info.Context = nullptr;
+
+ return Ret;
+ }
+
+ Error releaseInterop(omp_interop_val_t *Interop) override {
+ if (Interop)
+ delete Interop;
+ return Plugin::success();
+ }
+
Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
AsyncInfoWrapperTy &AsyncInfo) override {
AMDGPUStreamTy *Stream = nullptr;
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b30c651..e5c4a1b 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -917,6 +917,50 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Plugin::success();
}
+ interop_spec_t selectInteropPreference(int32_t InteropType,
+ int32_t NumPrefers,
+ interop_spec_t *Prefers) override {
+ return interop_spec_t{tgt_fr_cuda, {true, 0}, 0};
+ }
+
+ Expected<omp_interop_val_t *>
+ createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override {
+ auto *Ret = new omp_interop_val_t(
+ DeviceId, static_cast<kmp_interop_type_t>(InteropType));
+ Ret->fr_id = tgt_fr_cuda;
+ Ret->vendor_id = omp_vendor_nvidia;
+
+ if (InteropType == kmp_interop_type_target ||
+ InteropType == kmp_interop_type_targetsync) {
+ Ret->device_info.Platform = nullptr;
+ Ret->device_info.Device = reinterpret_cast<void *>(Device);
+ Ret->device_info.Context = Context;
+ }
+
+ if (InteropType == kmp_interop_type_targetsync) {
+ Ret->async_info = new __tgt_async_info();
+ if (auto Err = setContext())
+ return Err;
+ CUstream Stream;
+ if (auto Err = CUDAStreamManager.getResource(Stream))
+ return Err;
+
+ Ret->async_info->Queue = Stream;
+ }
+ return Ret;
+ }
+
+ Error releaseInterop(omp_interop_val_t *Interop) override {
+ if (!Interop)
+ return Plugin::success();
+
+ if (Interop->async_info)
+ delete Interop->async_info;
+
+ delete Interop;
+ return Plugin::success();
+ }
+
Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData,
AsyncInfoWrapperTy &AsyncInfo) override {
if (auto Err = setContext())
diff --git a/offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f90 b/offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f90
new file mode 100644
index 0000000..65e04af
--- /dev/null
+++ b/offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f90
@@ -0,0 +1,43 @@
+! This test validates that declare mapper for a derived type that extends
+! a parent type with an allocatable component correctly maps the nested
+! allocatable payload via the mapper when the whole object is mapped on
+! target.
+
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+
+program target_declare_mapper_parent_allocatable
+ implicit none
+
+ type, abstract :: base_t
+ real, allocatable :: base_arr(:)
+ end type base_t
+
+ type, extends(base_t) :: real_t
+ real, allocatable :: real_arr(:)
+ end type real_t
+ !$omp declare mapper(custommapper: real_t :: t) map(t%base_arr, t%real_arr)
+
+ type(real_t) :: r
+ integer :: i
+ allocate(r%base_arr(10), source=1.0)
+ allocate(r%real_arr(10), source=1.0)
+
+ !$omp target map(mapper(custommapper), tofrom: r)
+ do i = 1, size(r%base_arr)
+ r%base_arr(i) = 2.0
+ r%real_arr(i) = 3.0
+ r%real_arr(i) = r%base_arr(1)
+ end do
+ !$omp end target
+
+
+ !CHECK: base_arr: 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
+ print*, "base_arr: ", r%base_arr
+ !CHECK: real_arr: 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
+ print*, "real_arr: ", r%real_arr
+
+ deallocate(r%real_arr)
+ deallocate(r%base_arr)
+end program target_declare_mapper_parent_allocatable