diff options
Diffstat (limited to 'offload')
-rw-r--r-- | offload/libomptarget/OpenMP/InteropAPI.cpp | 41 | ||||
-rw-r--r-- | offload/libomptarget/exports | 5 | ||||
-rw-r--r-- | offload/plugins-nextgen/amdgpu/src/rtl.cpp | 31 | ||||
-rw-r--r-- | offload/plugins-nextgen/cuda/src/rtl.cpp | 44 | ||||
-rw-r--r-- | offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f90 | 43 |
5 files changed, 159 insertions, 5 deletions
diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index eb5425e..c55ef2c 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -124,7 +124,7 @@ void *getProperty<void *>(omp_interop_val_t &InteropVal, case omp_ipr_device_context: return InteropVal.device_info.Context; case omp_ipr_targetsync: - return InteropVal.async_info->Queue; + return InteropVal.async_info ? InteropVal.async_info->Queue : nullptr; default:; } getTypeMismatch(Property, Err); @@ -167,7 +167,6 @@ bool getPropertyCheck(omp_interop_val_t **InteropPtr, omp_interop_property_t property_id, \ int *err) { \ omp_interop_val_t *interop_val = (omp_interop_val_t *)interop; \ - assert((interop_val)->interop_type == kmp_interop_type_targetsync); \ if (!getPropertyCheck(&interop_val, property_id, err)) { \ return (RETURN_TYPE)(0); \ } \ @@ -275,8 +274,8 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, return Interop; } -int __tgt_interop_use(ident_t *LocRef, omp_interop_val_t *Interop, - interop_ctx_t *Ctx, dep_pack_t *Deps) { +int __tgt_interop_use60(ident_t *LocRef, omp_interop_val_t *Interop, + interop_ctx_t *Ctx, dep_pack_t *Deps) { bool Nowait = Ctx->flags.nowait; DP("Call to %s with interop " DPxMOD ", nowait %" PRId32 "\n", __func__, DPxPTR(Interop), Nowait); @@ -359,6 +358,40 @@ EXTERN int ompx_interop_add_completion_callback(omp_interop_val_t *Interop, return omp_irc_success; } +// Backwards compatibility wrappers +void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, + omp_interop_val_t *&InteropPtr, int32_t InteropType, + int32_t DeviceId, int32_t Ndeps, + kmp_depend_info_t *DepList, int32_t HaveNowait) { + constexpr int32_t old_kmp_interop_type_targetsync = 2; + interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid}; + dep_pack_t Deps = {Ndeps, 0, DepList, nullptr}; + InteropPtr = + __tgt_interop_get(LocRef, + InteropType == old_kmp_interop_type_targetsync + ? kmp_interop_type_targetsync + : kmp_interop_type_target, + DeviceId, 0, nullptr, &Ctx, Ndeps ? &Deps : nullptr); +} + +void __tgt_interop_use(ident_t *LocRef, int32_t Gtid, + omp_interop_val_t *&InteropPtr, int32_t DeviceId, + int32_t Ndeps, kmp_depend_info_t *DepList, + int32_t HaveNowait) { + interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid}; + dep_pack_t Deps = {Ndeps, 0, DepList, nullptr}; + __tgt_interop_use60(LocRef, InteropPtr, &Ctx, Ndeps ? &Deps : nullptr); +} + +void __tgt_interop_destroy(ident_t *LocRef, int32_t Gtid, + omp_interop_val_t *&InteropPtr, int32_t DeviceId, + int32_t Ndeps, kmp_depend_info_t *DepList, + int32_t HaveNowait) { + interop_ctx_t Ctx = {0, {false, (bool)HaveNowait, 0}, Gtid}; + dep_pack_t Deps = {Ndeps, 0, DepList, nullptr}; + __tgt_interop_release(LocRef, InteropPtr, &Ctx, Ndeps ? &Deps : nullptr); +} + } // extern "C" llvm::Expected<DeviceTy &> omp_interop_val_t::getDevice() const { diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 8e2db6b..1374bfe 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -68,8 +68,11 @@ VERS1.0 { omp_get_interop_int; omp_get_interop_name; omp_get_interop_type_desc; - __tgt_interop_get; + __tgt_interop_init; __tgt_interop_use; + __tgt_interop_destroy; + __tgt_interop_get; + __tgt_interop_use60; __tgt_interop_release; __tgt_target_sync; __llvmPushCallConfiguration; diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 7b834ee..f73fa047 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2712,6 +2712,37 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Plugin::success(); } + interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) override { + // TODO: update once targetsync is supported + if (InteropType == kmp_interop_type_target) + return interop_spec_t{tgt_fr_hsa, {false, 0}, 0}; + return interop_spec_t{tgt_fr_none, {false, 0}, 0}; + } + + Expected<omp_interop_val_t *> + createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override { + auto *Ret = new omp_interop_val_t( + DeviceId, static_cast<kmp_interop_type_t>(InteropType)); + Ret->fr_id = tgt_fr_hsa; + Ret->vendor_id = omp_vendor_amd; + + // TODO: implement targetsync support + + Ret->device_info.Platform = nullptr; + Ret->device_info.Device = reinterpret_cast<void *>(Agent.handle); + Ret->device_info.Context = nullptr; + + return Ret; + } + + Error releaseInterop(omp_interop_val_t *Interop) override { + if (Interop) + delete Interop; + return Plugin::success(); + } + Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData, AsyncInfoWrapperTy &AsyncInfo) override { AMDGPUStreamTy *Stream = nullptr; diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index b30c651..e5c4a1b 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -917,6 +917,50 @@ struct CUDADeviceTy : public GenericDeviceTy { return Plugin::success(); } + interop_spec_t selectInteropPreference(int32_t InteropType, + int32_t NumPrefers, + interop_spec_t *Prefers) override { + return interop_spec_t{tgt_fr_cuda, {true, 0}, 0}; + } + + Expected<omp_interop_val_t *> + createInterop(int32_t InteropType, interop_spec_t &InteropSpec) override { + auto *Ret = new omp_interop_val_t( + DeviceId, static_cast<kmp_interop_type_t>(InteropType)); + Ret->fr_id = tgt_fr_cuda; + Ret->vendor_id = omp_vendor_nvidia; + + if (InteropType == kmp_interop_type_target || + InteropType == kmp_interop_type_targetsync) { + Ret->device_info.Platform = nullptr; + Ret->device_info.Device = reinterpret_cast<void *>(Device); + Ret->device_info.Context = Context; + } + + if (InteropType == kmp_interop_type_targetsync) { + Ret->async_info = new __tgt_async_info(); + if (auto Err = setContext()) + return Err; + CUstream Stream; + if (auto Err = CUDAStreamManager.getResource(Stream)) + return Err; + + Ret->async_info->Queue = Stream; + } + return Ret; + } + + Error releaseInterop(omp_interop_val_t *Interop) override { + if (!Interop) + return Plugin::success(); + + if (Interop->async_info) + delete Interop->async_info; + + delete Interop; + return Plugin::success(); + } + Error enqueueHostCallImpl(void (*Callback)(void *), void *UserData, AsyncInfoWrapperTy &AsyncInfo) override { if (auto Err = setContext()) diff --git a/offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f90 b/offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f90 new file mode 100644 index 0000000..65e04af --- /dev/null +++ b/offload/test/offloading/fortran/target-declare-mapper-parent-allocatable.f90 @@ -0,0 +1,43 @@ +! This test validates that declare mapper for a derived type that extends +! a parent type with an allocatable component correctly maps the nested +! allocatable payload via the mapper when the whole object is mapped on +! target. + +! REQUIRES: flang, amdgpu + +! RUN: %libomptarget-compile-fortran-run-and-check-generic + +program target_declare_mapper_parent_allocatable + implicit none + + type, abstract :: base_t + real, allocatable :: base_arr(:) + end type base_t + + type, extends(base_t) :: real_t + real, allocatable :: real_arr(:) + end type real_t + !$omp declare mapper(custommapper: real_t :: t) map(t%base_arr, t%real_arr) + + type(real_t) :: r + integer :: i + allocate(r%base_arr(10), source=1.0) + allocate(r%real_arr(10), source=1.0) + + !$omp target map(mapper(custommapper), tofrom: r) + do i = 1, size(r%base_arr) + r%base_arr(i) = 2.0 + r%real_arr(i) = 3.0 + r%real_arr(i) = r%base_arr(1) + end do + !$omp end target + + + !CHECK: base_arr: 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. + print*, "base_arr: ", r%base_arr + !CHECK: real_arr: 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. + print*, "real_arr: ", r%real_arr + + deallocate(r%real_arr) + deallocate(r%base_arr) +end program target_declare_mapper_parent_allocatable |