aboutsummaryrefslogtreecommitdiff
path: root/offload
diff options
context:
space:
mode:
Diffstat (limited to 'offload')
-rw-r--r--offload/ci/.gitignore1
-rwxr-xr-xoffload/ci/openmp-offload-amdgpu-clang-flang.py71
-rwxr-xr-xoffload/ci/openmp-offload-amdgpu-runtime.py60
-rw-r--r--offload/cmake/caches/FlangOffload.cmake13
-rw-r--r--offload/include/Shared/Debug.h87
-rw-r--r--offload/include/omptarget.h4
-rw-r--r--offload/liboffload/API/Queue.td12
-rw-r--r--offload/liboffload/src/OffloadImpl.cpp32
-rw-r--r--offload/libomptarget/OpenMP/InteropAPI.cpp2
-rw-r--r--offload/libomptarget/OpenMP/Mapping.cpp20
-rw-r--r--offload/libomptarget/PluginManager.cpp6
-rw-r--r--offload/libomptarget/omptarget.cpp40
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp14
-rw-r--r--offload/plugins-nextgen/common/include/PluginInterface.h6
-rw-r--r--offload/plugins-nextgen/common/src/PluginInterface.cpp23
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp15
-rw-r--r--offload/plugins-nextgen/host/src/rtl.cpp5
-rw-r--r--offload/plugins-nextgen/level_zero/include/L0Device.h3
-rw-r--r--offload/plugins-nextgen/level_zero/src/L0Device.cpp21
-rw-r--r--offload/test/mapping/declare_mapper_target_checks.cpp145
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback.cpp32
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_nullify.cpp28
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_preserve.cpp28
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback.cpp33
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_nullify.cpp29
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_preserve.cpp29
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.cpp (renamed from offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.c)6
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_nullify.cpp21
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_preserve.cpp21
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback.cpp26
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_nullify.cpp24
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_preserve.cpp22
-rw-r--r--offload/test/offloading/fortran/default-mapper-derived-enter-data-teams-collapse.f9053
-rw-r--r--offload/test/offloading/fortran/recursive-default-mapper.f9040
-rw-r--r--offload/test/offloading/fortran/target-defaultmap-implicit-mapper.f9049
-rw-r--r--offload/test/offloading/fortran/target-parameter-array.f90131
-rw-r--r--offload/test/offloading/strided_multiple_update_from.c (renamed from offload/test/offloading/strided_multiple_update.c)0
-rw-r--r--offload/test/offloading/strided_multiple_update_to.c124
-rw-r--r--offload/test/offloading/strided_partial_update_from.c (renamed from offload/test/offloading/strided_partial_update.c)0
-rw-r--r--offload/test/offloading/strided_partial_update_to.c74
-rw-r--r--offload/test/offloading/strided_update_from.c (renamed from offload/test/offloading/strided_update.c)0
-rw-r--r--offload/test/offloading/strided_update_to.c74
-rw-r--r--offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp16
-rw-r--r--offload/unittests/OffloadAPI/CMakeLists.txt3
-rw-r--r--offload/unittests/OffloadAPI/queue/olQueryQueue.cpp24
45 files changed, 1380 insertions, 87 deletions
diff --git a/offload/ci/.gitignore b/offload/ci/.gitignore
new file mode 100644
index 0000000..f2198f0
--- /dev/null
+++ b/offload/ci/.gitignore
@@ -0,0 +1 @@
+/*.workdir
diff --git a/offload/ci/openmp-offload-amdgpu-clang-flang.py b/offload/ci/openmp-offload-amdgpu-clang-flang.py
new file mode 100755
index 0000000..f55ae02
--- /dev/null
+++ b/offload/ci/openmp-offload-amdgpu-clang-flang.py
@@ -0,0 +1,71 @@
+#! /usr/bin/env python3
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import os
+import sys
+
+# Adapt to location in source tree
+llvmsrcroot = os.path.normpath(f"{__file__}/../../..")
+
+sys.path.insert(0, os.path.join(llvmsrcroot, ".ci/buildbot"))
+import worker
+
+llvmbuilddir = "llvm.build"
+llvminstalldir = "llvm.inst"
+
+with worker.run(
+ __file__,
+ llvmsrcroot,
+ clobberpaths=[llvmbuilddir, llvminstalldir],
+ workerjobs=64,
+) as w:
+ with w.step("configure-openmp", halt_on_fail=True):
+ w.run_command(
+ [
+ "cmake",
+ f"-S{w.in_llvmsrc('llvm')}",
+ f"-B{llvmbuilddir}",
+ "-GNinja",
+ "-DCMAKE_BUILD_TYPE=Release",
+ "-DLLVM_ENABLE_ASSERTIONS=ON",
+ f"-DLLVM_LIT_ARGS=-vv --show-unsupported --show-xfail -j {w.jobs} --time-tests --timeout 100",
+ f"-DCMAKE_INSTALL_PREFIX={w.in_workdir(llvminstalldir)}",
+ "-DCLANG_DEFAULT_LINKER=lld",
+ "-DLLVM_TARGETS_TO_BUILD=X86;AMDGPU",
+ "-DLLVM_ENABLE_ASSERTIONS=ON",
+ "-DCMAKE_C_COMPILER_LAUNCHER=ccache",
+ "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache",
+ "-DFLANG_RUNTIME_F128_MATH_LIB=libquadmath",
+ "-DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON",
+ "-DCMAKE_CXX_STANDARD=17",
+ "-DBUILD_SHARED_LIBS=ON",
+ "-DLIBOMPTARGET_PLUGINS_TO_BUILD=amdgpu;host",
+ "-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=compiler-rt;openmp",
+ "-DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa",
+ "-DCOMPILER_RT_BUILD_ORC=OFF",
+ "-DCOMPILER_RT_BUILD_XRAY=OFF",
+ "-DCOMPILER_RT_BUILD_MEMPROF=OFF",
+ "-DCOMPILER_RT_BUILD_LIBFUZZER=OFF",
+ "-DCOMPILER_RT_BUILD_SANITIZERS=ON",
+ "-DLLVM_ENABLE_PROJECTS=clang;lld;mlir;flang;llvm",
+ "-DLLVM_ENABLE_RUNTIMES=flang-rt;offload;compiler-rt;openmp",
+ ]
+ )
+
+ with w.step("compile-openmp", halt_on_fail=True):
+ w.run_ninja(builddir=llvmbuilddir, ccache_stats=True)
+
+ with w.step("test-openmp"):
+ w.run_ninja(
+ ["check-openmp"], add_env={"HSA_ENABLE_SDMA": "0"}, builddir=llvmbuilddir
+ )
+
+ with w.step("Add check check-offload"):
+ w.run_ninja(
+ ["check-offload"], add_env={"HSA_ENABLE_SDMA": "0"}, builddir=llvmbuilddir
+ )
+
+ with w.step("LLVM: Install", halt_on_fail=True):
+ w.run_ninja(["install"], builddir=llvmbuilddir)
diff --git a/offload/ci/openmp-offload-amdgpu-runtime.py b/offload/ci/openmp-offload-amdgpu-runtime.py
new file mode 100755
index 0000000..e93f54f
--- /dev/null
+++ b/offload/ci/openmp-offload-amdgpu-runtime.py
@@ -0,0 +1,60 @@
+#! /usr/bin/env python3
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import os
+import sys
+
+# Adapt to location in source tree
+llvmsrcroot = os.path.normpath(f"{__file__}/../../..")
+
+sys.path.insert(0, os.path.join(llvmsrcroot, ".ci/buildbot"))
+import worker
+
+llvmbuilddir = "llvm.build"
+llvminstalldir = "llvm.inst"
+
+with worker.run(
+ __file__,
+ llvmsrcroot,
+ clobberpaths=[llvmbuilddir, llvminstalldir],
+ workerjobs=32,
+) as w:
+ with w.step("configure-openmp", halt_on_fail=True):
+ w.run_command(
+ [
+ "cmake",
+ f"-S{w.in_llvmsrc('llvm')}",
+ f"-B{llvmbuilddir}",
+ "-GNinja",
+ "-DCMAKE_BUILD_TYPE=Release",
+ "-DLLVM_ENABLE_ASSERTIONS=ON",
+ f"-DLLVM_LIT_ARGS=-vv --show-unsupported --show-xfail -j {w.jobs} --time-tests --timeout 100",
+ f"-DCMAKE_INSTALL_PREFIX={w.in_workdir(llvminstalldir)}",
+ "-DCLANG_DEFAULT_LINKER=lld",
+ "-DLLVM_TARGETS_TO_BUILD=X86;AMDGPU",
+ "-DCMAKE_C_COMPILER_LAUNCHER=ccache",
+ "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache",
+ "-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=compiler-rt;openmp",
+ "-DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa",
+ "-DLLVM_ENABLE_PROJECTS=clang;lld;llvm",
+ "-DLLVM_ENABLE_RUNTIMES=offload;compiler-rt;openmp",
+ ]
+ )
+
+ with w.step("compile-openmp", halt_on_fail=True):
+ w.run_ninja(builddir=llvmbuilddir, ccache_stats=True)
+
+ with w.step("test-openmp"):
+ w.run_ninja(
+ ["check-openmp"], add_env={"HSA_ENABLE_SDMA": "0"}, builddir=llvmbuilddir
+ )
+
+ with w.step("Add check check-offload"):
+ w.run_ninja(
+ ["check-offload"], add_env={"HSA_ENABLE_SDMA": "0"}, builddir=llvmbuilddir
+ )
+
+ with w.step("LLVM: Install", halt_on_fail=True):
+ w.run_ninja(["install"], builddir=llvmbuilddir)
diff --git a/offload/cmake/caches/FlangOffload.cmake b/offload/cmake/caches/FlangOffload.cmake
new file mode 100644
index 0000000..ed75df6
--- /dev/null
+++ b/offload/cmake/caches/FlangOffload.cmake
@@ -0,0 +1,13 @@
+set(LLVM_ENABLE_PROJECTS "clang;flang;mlir;clang-tools-extra;lld" CACHE STRING "")
+set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;libcxx;libcxxabi;openmp;offload" CACHE STRING "")
+set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
+
+set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "")
+set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
+set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
+set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "")
+set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "")
+set(RUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBC_PROVIDER llvm CACHE STRING "")
+set(RUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBCXX_PROVIDER llvm CACHE STRING "")
+set(RUNTIMES_amdgcn-amd-amdhsa_FLANG_RT_LIBC_PROVIDER llvm CACHE STRING "")
+set(RUNTIMES_amdgcn-amd-amdhsa_FLANG_RT_LIBCXX_PROVIDER llvm CACHE STRING "")
diff --git a/offload/include/Shared/Debug.h b/offload/include/Shared/Debug.h
index f1c3386..0f98a44 100644
--- a/offload/include/Shared/Debug.h
+++ b/offload/include/Shared/Debug.h
@@ -45,7 +45,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/raw_ostream.h"
/// 32-Bit field data attributes controlling information presented to the user.
enum OpenMPInfoType : uint32_t {
@@ -142,7 +142,7 @@ inline uint32_t getInfoLevel() { return getInfoLevelInternal().load(); }
#define INFO(_flags, _id, ...) \
do { \
if (::llvm::offload::debug::isDebugEnabled()) { \
- DP(__VA_ARGS__); \
+ INFO_DEBUG_INT(_flags, _id, __VA_ARGS__); \
} else if (getInfoLevel() & _flags) { \
INFO_MESSAGE(_id, __VA_ARGS__); \
} \
@@ -174,6 +174,14 @@ private:
bool ShouldEmitNewLineOnDestruction;
bool NeedEndNewLine = false;
+ /// Buffer to reduce interference between different threads
+ /// writing at the same time to the underlying stream.
+ static constexpr size_t BufferSize = 256;
+ llvm::SmallString<BufferSize> Buffer;
+
+ // Stream to write into Buffer. Its flushed to Os upon destruction.
+ llvm::raw_svector_ostream BufferStrm;
+
/// If the stream is muted, writes to it are ignored
bool Muted = false;
@@ -203,13 +211,13 @@ private:
NeedEndNewLine = true;
}
}
- void emitPrefix() { Os.write(Prefix.c_str(), Prefix.size()); }
+ void emitPrefix() { BufferStrm.write(Prefix.c_str(), Prefix.size()); }
void writeWithPrefix(StringRef Str) {
if (ShouldPrefixNextString) {
emitPrefix();
ShouldPrefixNextString = false;
}
- Os.write(Str.data(), Str.size());
+ BufferStrm.write(Str.data(), Str.size());
}
public:
@@ -218,26 +226,29 @@ public:
bool ShouldEmitNewLineOnDestruction = true)
: Prefix(std::move(Prefix)), Os(Os), BaseLevel(BaseLevel),
ShouldPrefixNextString(ShouldPrefixNextString),
- ShouldEmitNewLineOnDestruction(ShouldEmitNewLineOnDestruction) {
+ ShouldEmitNewLineOnDestruction(ShouldEmitNewLineOnDestruction),
+ BufferStrm(Buffer) {
SetUnbuffered();
}
~odbg_ostream() final {
if (ShouldEmitNewLineOnDestruction && NeedEndNewLine)
- Os << '\n';
+ BufferStrm << '\n';
+ Os << BufferStrm.str();
}
odbg_ostream(const odbg_ostream &) = delete;
odbg_ostream &operator=(const odbg_ostream &) = delete;
- odbg_ostream(odbg_ostream &&other) : Os(other.Os) {
+ odbg_ostream(odbg_ostream &&other) : Os(other.Os), BufferStrm(Buffer) {
Prefix = std::move(other.Prefix);
BaseLevel = other.BaseLevel;
ShouldPrefixNextString = other.ShouldPrefixNextString;
ShouldEmitNewLineOnDestruction = other.ShouldEmitNewLineOnDestruction;
NeedEndNewLine = other.NeedEndNewLine;
Muted = other.Muted;
+ BufferStrm << other.BufferStrm.str();
}
/// Forward the current_pos method to the underlying stream.
- uint64_t current_pos() const final { return Os.tell(); }
+ uint64_t current_pos() const final { return BufferStrm.tell(); }
/// Some of the `<<` operators expect an lvalue, so we trick the type
/// system.
@@ -247,17 +258,8 @@ public:
void shouldMute(const OnlyLevel Filter) { Muted = BaseLevel != Filter; }
};
-/// dbgs - Return a circular-buffered debug stream.
-[[maybe_unused]] static llvm::raw_ostream &dbgs() {
- // Do one-time initialization in a thread-safe way.
- static struct dbgstream {
- llvm::circular_raw_ostream strm;
-
- dbgstream() : strm(llvm::errs(), "*** Debug Log Output ***\n", 0) {}
- } thestrm;
-
- return thestrm.strm;
-}
+/// dbgs - Return the debug stream for offload debugging (just llvm::errs()).
+[[maybe_unused]] static llvm::raw_ostream &dbgs() { return llvm::errs(); }
#ifdef OMPTARGET_DEBUG
@@ -306,8 +308,14 @@ struct DebugSettings {
return;
Settings.Enabled = true;
- if (EnvRef.equals_insensitive("all"))
- return;
+
+ if (EnvRef.starts_with_insensitive("all")) {
+ auto Spec = parseDebugFilter(EnvRef);
+ if (Spec.Type.equals_insensitive("all")) {
+ Settings.DefaultLevel = Spec.Level;
+ return;
+ }
+ }
if (!EnvRef.getAsInteger(10, Settings.DefaultLevel))
return;
@@ -618,7 +626,38 @@ static inline odbg_ostream reportErrorStream() {
#define FORMAT_TO_STR(Format, ...) \
::llvm::omp::target::debug::formatToStr(Format __VA_OPT__(, ) __VA_ARGS__)
-#define DP(...) ODBG() << FORMAT_TO_STR(__VA_ARGS__);
+template <uint32_t InfoId> static constexpr const char *InfoIdToODT() {
+ constexpr auto getId = []() {
+ switch (InfoId) {
+ case OMP_INFOTYPE_KERNEL_ARGS:
+ return "KernelArgs";
+ case OMP_INFOTYPE_MAPPING_EXISTS:
+ return "MappingExists";
+ case OMP_INFOTYPE_DUMP_TABLE:
+ return "DumpTable";
+ case OMP_INFOTYPE_MAPPING_CHANGED:
+ return "MappingChanged";
+ case OMP_INFOTYPE_PLUGIN_KERNEL:
+ return "PluginKernel";
+ case OMP_INFOTYPE_DATA_TRANSFER:
+ return "DataTransfer";
+ case OMP_INFOTYPE_EMPTY_MAPPING:
+ return "EmptyMapping";
+ case OMP_INFOTYPE_ALL:
+ return "Default";
+ }
+ return static_cast<const char *>(nullptr);
+ };
+
+ constexpr const char *result = getId();
+ static_assert(result != nullptr, "Unknown InfoId being used");
+ return result;
+}
+
+// Transform the INFO id to the corresponding debug type and print the message
+#define INFO_DEBUG_INT(_flags, _id, ...) \
+ ODBG(::llvm::omp::target::debug::InfoIdToODT<_flags>()) \
+ << FORMAT_TO_STR(__VA_ARGS__);
// Define default format for pointers
static inline raw_ostream &operator<<(raw_ostream &Os, void *Ptr) {
@@ -627,9 +666,11 @@ static inline raw_ostream &operator<<(raw_ostream &Os, void *Ptr) {
}
#else
-#define DP(...) \
+
+#define INFO_DEBUG_INT(_flags, _id, ...) \
{ \
}
+
#endif // OMPTARGET_DEBUG
// New REPORT macro in the same style as ODBG
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index fd458fa0..5dddd00 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -80,6 +80,10 @@ enum tgt_map_type {
// Attach pointer and pointee, after processing all other maps.
// Applicable to map-entering directives. Does not change ref-count.
OMP_TGT_MAPTYPE_ATTACH = 0x4000,
+ // When a lookup fails, fall back to using null as the translated pointer,
+ // instead of preserving the original pointer's value. Currently only
+ // useful in conjunction with RETURN_PARAM.
+ OMP_TGT_MAPTYPE_FB_NULLIFY = 0x8000,
// descriptor for non-contiguous target-update
OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000,
// member of struct, member given by [16 MSBs] - 1
diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td
index ededa9c..4008432 100644
--- a/offload/liboffload/API/Queue.td
+++ b/offload/liboffload/API/Queue.td
@@ -125,3 +125,15 @@ def olLaunchHostFunction : Function {
];
let returns = [];
}
+
+def olQueryQueue : Function {
+ let desc = "Query for queue work completion in a non-blocking manner.";
+ let details = [
+ "The function checks if a queue work has completed enqueued work without blocking the calling thread."
+ ];
+ let params = [
+ Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>,
+ Param<"bool *", "IsQueueWorkCompleted", " A flag indicating if the queue work has completed", PARAM_OUT_OPTIONAL>
+ ];
+ let returns = [];
+}
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 4a4f665..6c37f5b 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -1005,10 +1005,28 @@ Error olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
if (auto Res =
DstDevice->Device->dataSubmit(DstPtr, SrcPtr, Size, QueueImpl))
return Res;
- } else {
+ } else if (SrcDevice->Platform.Plugin == DstDevice->Platform.Plugin &&
+ SrcDevice->Platform.Plugin->isDataExchangable(
+ SrcDevice->Device->getDeviceId(),
+ DstDevice->Device->getDeviceId())) {
if (auto Res = SrcDevice->Device->dataExchange(SrcPtr, *DstDevice->Device,
DstPtr, Size, QueueImpl))
return Res;
+ } else {
+ if (Queue)
+ if (auto Res = olSyncQueue_impl(Queue))
+ return Res;
+
+ void *Buffer = malloc(Size);
+ if (!Buffer)
+ return createOffloadError(ErrorCode::OUT_OF_RESOURCES,
+ "Couldn't allocate a buffer for transfer");
+ Error Res = SrcDevice->Device->dataRetrieve(Buffer, SrcPtr, Size, nullptr);
+ if (!Res)
+ Res = DstDevice->Device->dataSubmit(DstPtr, Buffer, Size, nullptr);
+
+ free(Buffer);
+ return Res;
}
return Error::success();
@@ -1231,5 +1249,17 @@ Error olMemUnregister_impl(ol_device_handle_t Device, void *Ptr) {
return Device->Device->dataUnlock(Ptr);
}
+Error olQueryQueue_impl(ol_queue_handle_t Queue, bool *IsQueueWorkCompleted) {
+ if (Queue->AsyncInfo->Queue) {
+ if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo, false,
+ IsQueueWorkCompleted))
+ return Err;
+ } else if (IsQueueWorkCompleted) {
+ // No underlying queue means there's no work to complete.
+ *IsQueueWorkCompleted = true;
+ }
+ return Error::success();
+}
+
} // namespace offload
} // namespace llvm
diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp
index a694af1..2eaf376 100644
--- a/offload/libomptarget/OpenMP/InteropAPI.cpp
+++ b/offload/libomptarget/OpenMP/InteropAPI.cpp
@@ -219,7 +219,7 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType,
auto DeviceOrErr = PM->getDevice(DeviceNum);
if (!DeviceOrErr) {
- [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError());
+ std::string ErrStr = toString(DeviceOrErr.takeError());
ODBG(ODT_Interface) << "Couldn't find device " << DeviceNum
<< " while constructing interop object: " << ErrStr;
return omp_interop_none;
diff --git a/offload/libomptarget/OpenMP/Mapping.cpp b/offload/libomptarget/OpenMP/Mapping.cpp
index c71728c..b8edd79 100644
--- a/offload/libomptarget/OpenMP/Mapping.cpp
+++ b/offload/libomptarget/OpenMP/Mapping.cpp
@@ -83,10 +83,14 @@ int MappingInfoTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin,
/*UseHoldRefCount=*/false, /*Name=*/nullptr,
/*IsRefCountINF=*/true))
.first->HDTT;
- ODBG(ODT_Mapping) << "Creating new map entry: HstBase=" << NewEntry.HstPtrBase
- << ", HstBegin=" << NewEntry.HstPtrBegin
- << ", HstEnd=" << NewEntry.HstPtrEnd
- << ", TgtBegin=" << NewEntry.TgtPtrBegin
+ ODBG(ODT_Mapping) << "Creating new map entry: HstBase="
+ << reinterpret_cast<void *>(NewEntry.HstPtrBase)
+ << ", HstBegin="
+ << reinterpret_cast<void *>(NewEntry.HstPtrBegin)
+ << ", HstEnd="
+ << reinterpret_cast<void *>(NewEntry.HstPtrEnd)
+ << ", TgtBegin="
+ << reinterpret_cast<void *>(NewEntry.TgtPtrBegin)
<< ", DynRefCount=" << NewEntry.dynRefCountToStr()
<< ", HoldRefCount=" << NewEntry.holdRefCountToStr();
(void)NewEntry;
@@ -502,9 +506,11 @@ int MappingInfoTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry,
int64_t Size) {
assert(Entry && "Trying to deallocate a null entry.");
- ODBG(ODT_Mapping) << "Deleting tgt data " << Entry->TgtPtrBegin << " of size "
- << Size << " by freeing allocation "
- << "starting at " << Entry->TgtAllocBegin;
+ ODBG(ODT_Mapping) << "Deleting tgt data "
+ << reinterpret_cast<void *>(Entry->TgtPtrBegin)
+ << " of size " << Size << " by freeing allocation "
+ << "starting at "
+ << reinterpret_cast<void *>(Entry->TgtAllocBegin);
void *Event = Entry->getEvent();
if (Event && Device.destroyEvent(Event) != OFFLOAD_SUCCESS) {
diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp
index 924a986..41b653a 100644
--- a/offload/libomptarget/PluginManager.cpp
+++ b/offload/libomptarget/PluginManager.cpp
@@ -59,7 +59,7 @@ void PluginManager::deinit() {
continue;
if (auto Err = Plugin->deinit()) {
- [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+ std::string InfoMsg = toString(std::move(Err));
ODBG(ODT_Deinit) << "Failed to deinit plugin: " << InfoMsg;
}
Plugin.release();
@@ -73,7 +73,7 @@ bool PluginManager::initializePlugin(GenericPluginTy &Plugin) {
return true;
if (auto Err = Plugin.init()) {
- [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+ std::string InfoMsg = toString(std::move(Err));
ODBG(ODT_Init) << "Failed to init plugin: " << InfoMsg;
return false;
}
@@ -106,7 +106,7 @@ bool PluginManager::initializeDevice(GenericPluginTy &Plugin,
auto Device = std::make_unique<DeviceTy>(&Plugin, UserId, DeviceId);
if (auto Err = Device->init()) {
- [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
+ std::string InfoMsg = toString(std::move(Err));
ODBG(ODT_Init) << "Failed to init device " << DeviceId << ": " << InfoMsg;
return false;
}
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 676fda5..bd99ede 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -600,8 +600,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
// then no argument is marked as TARGET_PARAM ("omp target data map" is not
// associated with a target region, so there are no target parameters). This
// may be considered a hack, we could revise the scheme in the future.
- bool UpdateRef =
- !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && I == 0);
+ bool UpdateRef = !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF);
MappingInfoTy::HDTTMapAccessorTy HDTTMap =
Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor();
@@ -707,14 +706,20 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
// to references to a local device pointer that refers to this device
// address.
//
- // TODO: Add a new map-type bit to support OpenMP 6.1's `fb_nullify`
- // and set the result to `nullptr - Delta`. Note that `fb_nullify` is
- // already the default for `need_device_ptr`, but clang/flang do not
- // support its codegen yet.
- TgtPtrBase = reinterpret_cast<void *>(
- reinterpret_cast<uintptr_t>(HstPtrBegin) - Delta);
- ODBG(ODT_Mapping) << "Returning host pointer " << TgtPtrBase
- << " as fallback (lookup failed)";
+ // OpenMP 6.1's `fb_nullify` fallback behavior: when the FB_NULLIFY bit
+ // is set by the compiler, e.g. for `use/need_device_ptr(fb_nullify)`),
+ // return `nullptr - Delta` when lookup fails.
+ if (ArgTypes[I] & OMP_TGT_MAPTYPE_FB_NULLIFY) {
+ TgtPtrBase = reinterpret_cast<void *>(
+ reinterpret_cast<uintptr_t>(nullptr) - Delta);
+ ODBG(ODT_Mapping) << "Returning offsetted null pointer " << TgtPtrBase
+ << " as fallback (lookup failed)";
+ } else {
+ TgtPtrBase = reinterpret_cast<void *>(
+ reinterpret_cast<uintptr_t>(HstPtrBegin) - Delta);
+ ODBG(ODT_Mapping) << "Returning host pointer " << TgtPtrBase
+ << " as fallback (lookup failed)";
+ }
}
ArgsBase[I] = TgtPtrBase;
}
@@ -1104,9 +1109,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void *HstPtrBegin = Args[I];
int64_t DataSize = ArgSizes[I];
bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
- bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
- (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) &&
- !(FromMapper && I == 0);
+ bool UpdateRef = !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
+ (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE;
bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
@@ -1254,12 +1258,12 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
<< "Restoring target descriptor " << ShadowPtr.TgtPtrAddr
<< " to its original content (" << ShadowPtr.PtrSize
<< " bytes), containing pointee address "
- << ShadowPtr.TgtPtrContent.data();
+ << static_cast<const void *>(ShadowPtr.TgtPtrContent.data());
} else {
ODBG(ODT_Mapping)
<< "Restoring target pointer " << ShadowPtr.TgtPtrAddr
<< " to its original value "
- << ShadowPtr.TgtPtrContent.data();
+ << static_cast<const void *>(ShadowPtr.TgtPtrContent.data());
}
Ret = Device.submitData(ShadowPtr.TgtPtrAddr,
ShadowPtr.TgtPtrContent.data(),
@@ -1299,12 +1303,14 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
<< "Restoring host descriptor " << ShadowPtr.HstPtrAddr
<< " to its original content (" << ShadowPtr.PtrSize
<< " bytes), containing pointee address "
- << ShadowPtr.HstPtrContent.data();
+ << static_cast<const void *>(
+ ShadowPtr.HstPtrContent.data());
} else {
ODBG(ODT_Mapping)
<< "Restoring host pointer " << ShadowPtr.HstPtrAddr
<< " to its original value "
- << ShadowPtr.HstPtrContent.data();
+ << static_cast<const void *>(
+ ShadowPtr.HstPtrContent.data());
}
std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(),
ShadowPtr.PtrSize);
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 287bb14..379c8ec 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2431,7 +2431,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
}
/// Query for the completion of the pending operations on the async info.
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override {
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) override {
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = false;
AMDGPUStreamTy *Stream =
reinterpret_cast<AMDGPUStreamTy *>(AsyncInfo.Queue);
assert(Stream && "Invalid stream");
@@ -2444,11 +2447,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (!(*CompletedOrErr))
return Plugin::success();
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = true;
// Once the stream is completed, return it to stream pool and reset
// AsyncInfo. This is to make sure the synchronization only works for its
// own tasks.
- AsyncInfo.Queue = nullptr;
- return AMDGPUStreamManager.returnResource(Stream);
+ if (ReleaseQueue) {
+ AsyncInfo.Queue = nullptr;
+ return AMDGPUStreamManager.returnResource(Stream);
+ }
+ return Plugin::success();
}
/// Pin the host buffer and return the device pointer that should be used for
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index caf86a9..19db44c 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -854,8 +854,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Query for the completion of the pending operations on the __tgt_async_info
/// structure in a non-blocking manner.
- Error queryAsync(__tgt_async_info *AsyncInfo);
- virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo) = 0;
+ Error queryAsync(__tgt_async_info *AsyncInfo, bool ReleaseQueue = true,
+ bool *IsQueueWorkCompleted = nullptr);
+ virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) = 0;
/// Check whether the architecture supports VA management
virtual bool supportVAManagement() const { return false; }
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 4ec8366..807df0f 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -849,7 +849,8 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
}
Expected<DeviceImageTy *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
StringRef InputTgtImage) {
- ODBG(OLDT_Init) << "Load data from image " << InputTgtImage.bytes_begin();
+ ODBG(OLDT_Init) << "Load data from image "
+ << static_cast<const void *>(InputTgtImage.bytes_begin());
std::unique_ptr<MemoryBuffer> Buffer;
if (identify_magic(InputTgtImage) == file_magic::bitcode) {
@@ -1198,12 +1199,14 @@ Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo,
return Plugin::success();
}
-Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo) {
+Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo,
+ bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) {
if (!AsyncInfo || !AsyncInfo->Queue)
return Plugin::error(ErrorCode::INVALID_ARGUMENT,
"invalid async info queue");
- return queryAsyncImpl(*AsyncInfo);
+ return queryAsyncImpl(*AsyncInfo, ReleaseQueue, IsQueueWorkCompleted);
}
Error GenericDeviceTy::memoryVAMap(void **Addr, void *VAddr, size_t *RSize) {
@@ -1656,9 +1659,10 @@ int32_t GenericPluginTy::is_initialized() const { return Initialized; }
int32_t GenericPluginTy::isPluginCompatible(StringRef Image) {
auto HandleError = [&](Error Err) -> bool {
- [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
- ODBG(OLDT_Init) << "Failure to check validity of image " << Image.data()
- << ": " << ErrStr;
+ std::string ErrStr = toString(std::move(Err));
+ ODBG(OLDT_Init) << "Failure to check validity of image "
+ << static_cast<const void *>(Image.data()) << ": "
+ << ErrStr;
return false;
};
switch (identify_magic(Image)) {
@@ -1685,8 +1689,9 @@ int32_t GenericPluginTy::isPluginCompatible(StringRef Image) {
int32_t GenericPluginTy::isDeviceCompatible(int32_t DeviceId, StringRef Image) {
auto HandleError = [&](Error Err) -> bool {
- [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
- ODBG(OLDT_Init) << "Failure to check validity of image " << Image << ": "
+ std::string ErrStr = toString(std::move(Err));
+ ODBG(OLDT_Init) << "Failure to check validity of image "
+ << static_cast<const void *>(Image.data()) << ": "
<< ErrStr;
return false;
};
@@ -2069,7 +2074,7 @@ int32_t GenericPluginTy::use_auto_zero_copy(int32_t DeviceId) {
int32_t GenericPluginTy::is_accessible_ptr(int32_t DeviceId, const void *Ptr,
size_t Size) {
auto HandleError = [&](Error Err) -> bool {
- [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
+ std::string ErrStr = toString(std::move(Err));
ODBG(OLDT_Device) << "Failure while checking accessibility of pointer "
<< Ptr << " for device " << DeviceId << ": " << ErrStr;
return false;
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 621c90e..d5ab0b3 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -793,7 +793,10 @@ struct CUDADeviceTy : public GenericDeviceTy {
}
/// Query for the completion of the pending operations on the async info.
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override {
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) override {
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = false;
CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo.Queue);
CUresult Res = cuStreamQuery(Stream);
@@ -801,12 +804,16 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (Res == CUDA_ERROR_NOT_READY)
return Plugin::success();
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = true;
// Once the stream is synchronized and the operations completed (or an error
// occurs), return it to stream pool and reset AsyncInfo. This is to make
// sure the synchronization only works for its own tasks.
- AsyncInfo.Queue = nullptr;
- if (auto Err = CUDAStreamManager.returnResource(Stream))
- return Err;
+ if (ReleaseQueue) {
+ AsyncInfo.Queue = nullptr;
+ if (auto Err = CUDAStreamManager.returnResource(Stream))
+ return Err;
+ }
return Plugin::check(Res, "error in cuStreamQuery: %s");
}
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 81fbb67..6033796 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -336,7 +336,10 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
/// All functions are already synchronous. No need to do anything on this
/// query function.
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override {
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) override {
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = true;
return Plugin::success();
}
diff --git a/offload/plugins-nextgen/level_zero/include/L0Device.h b/offload/plugins-nextgen/level_zero/include/L0Device.h
index d14e710..001a41b 100644
--- a/offload/plugins-nextgen/level_zero/include/L0Device.h
+++ b/offload/plugins-nextgen/level_zero/include/L0Device.h
@@ -576,7 +576,8 @@ public:
AsyncInfoWrapperTy &AsyncInfoWrapper) override;
Error synchronizeImpl(__tgt_async_info &AsyncInfo,
bool ReleaseQueue) override;
- Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override;
+ Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) override;
Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
AsyncInfoWrapperTy &AsyncInfoWrapper) override;
Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size,
diff --git a/offload/plugins-nextgen/level_zero/src/L0Device.cpp b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
index 2cae1e4..4db3c4e 100644
--- a/offload/plugins-nextgen/level_zero/src/L0Device.cpp
+++ b/offload/plugins-nextgen/level_zero/src/L0Device.cpp
@@ -192,8 +192,7 @@ Error L0DeviceTy::initImpl(GenericPluginTy &Plugin) {
CALL_ZE_RET_ERROR(zeDeviceGetCacheProperties, zeDevice, &Count,
&CacheProperties);
- DeviceName =
- std::string(DeviceProperties.name, sizeof(DeviceProperties.name));
+ DeviceName = std::string(DeviceProperties.name);
ODBG(OLDT_Device) << "Found a GPU device, Name = " << DeviceProperties.name;
@@ -356,10 +355,15 @@ L0DeviceTy::hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) {
return true;
}
-Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) {
+Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue,
+ bool *IsQueueWorkCompleted) {
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = true;
const bool IsAsync = AsyncInfo.Queue && asyncEnabled();
if (!IsAsync)
return Plugin::success();
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = false;
auto &Plugin = getPlugin();
auto *AsyncQueue = static_cast<AsyncQueueTy *>(AsyncInfo.Queue);
@@ -367,6 +371,9 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) {
if (!AsyncQueue->WaitEvents.empty())
return Plugin::success();
+ if (IsQueueWorkCompleted)
+ *IsQueueWorkCompleted = true;
+
// Commit delayed USM2M copies.
for (auto &USM2M : AsyncQueue->USM2MList) {
std::copy_n(static_cast<const char *>(std::get<0>(USM2M)),
@@ -377,9 +384,11 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) {
std::copy_n(static_cast<char *>(std::get<0>(H2M)), std::get<2>(H2M),
static_cast<char *>(std::get<1>(H2M)));
}
- Plugin.releaseAsyncQueue(AsyncQueue);
- getStagingBuffer().reset();
- AsyncInfo.Queue = nullptr;
+ if (ReleaseQueue) {
+ Plugin.releaseAsyncQueue(AsyncQueue);
+ getStagingBuffer().reset();
+ AsyncInfo.Queue = nullptr;
+ }
return Plugin::success();
}
diff --git a/offload/test/mapping/declare_mapper_target_checks.cpp b/offload/test/mapping/declare_mapper_target_checks.cpp
new file mode 100644
index 0000000..562e283
--- /dev/null
+++ b/offload/test/mapping/declare_mapper_target_checks.cpp
@@ -0,0 +1,145 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+// XFAIL: intelgpu
+
+#include <omp.h>
+#include <stdio.h>
+
+#define TRUE 1
+#define FALSE 0
+
+struct TY1 {
+ int i1, i2, i3;
+ static constexpr auto name = "TY1";
+};
+struct TY2 {
+ int i1, i2, i3;
+ static constexpr auto name = "TY2";
+};
+
+// TY1 is not mapped, TY2 is
+#pragma omp declare mapper(TY2 t) map(to : t.i1) map(from : t.i3)
+
+struct TY3 {
+ TY2 n;
+ static constexpr auto name = "TY3";
+};
+struct TY4 {
+ int a;
+ TY2 n;
+ int b;
+ static constexpr auto name = "TY4";
+};
+
+template <typename T> int testType() {
+ T t1[2], t2[3], t3[4];
+ for (int i = 0; i < 2; i++)
+ t1[i].i1 = t3[i].i1 = 1;
+
+#pragma omp target map(tofrom : t1, t2, t3)
+ for (int i = 0; i < 2; i++) {
+ t1[i].i3 = t3[i].i3 = t1[i].i1;
+ t1[i].i1 = t3[i].i1 = 7;
+ }
+
+ for (int i = 0; i < 2; i++) {
+ if (t1[i].i3 != 1) {
+ printf("failed %s. t1[%d].i3 (%d) != t1[%d].i1 (%d)\n", T::name, i,
+ t1[i].i3, i, t1[i].i1);
+ return 1;
+ }
+ if (t3[i].i3 != 1) {
+ printf("failed %s. t3[%d].i3 (%d) != t3[%d].i1 (%d)\n", T::name, i,
+ t3[i].i3, i, t3[i].i1);
+ return 1;
+ }
+ }
+
+ int pt0 = omp_target_is_present(&t1[0], omp_get_default_device());
+ int pt1 = omp_target_is_present(&t2[1], omp_get_default_device());
+ int pt2 = omp_target_is_present(&t3[2], omp_get_default_device());
+
+ printf("present check for %s: t1 %i, t2 %i, t3 %i, expected 3x 0\n", T::name,
+ pt0, pt1, pt2);
+ return pt0 + pt1 + pt2;
+}
+
+template <typename T> int testTypeNestedPtr(T t1[2], T t2[3], T t3[4]) {
+ for (int i = 0; i < 2; i++)
+ t1[i].n.i1 = t3[i].n.i1 = 1;
+
+#pragma omp target map(tofrom : t1[0 : 2], t2[0 : 3], t3[0 : 4])
+ for (int i = 0; i < 2; i++) {
+ t1[i].n.i3 = t3[i].n.i3 = t1[i].n.i1;
+ t1[i].n.i1 = t3[i].n.i1 = 7;
+ }
+
+ for (int i = 0; i < 2; i++) {
+ if (t1[i].n.i3 != t1[i].n.i1) {
+ printf("failed %s-ptr. t1[%d].i3 (%d) != t1[%d].i1 (%d)\n", T::name, i,
+ t1[i].n.i3, i, t1[i].n.i1);
+ return 1;
+ }
+ if (t3[i].n.i3 != t3[i].n.i1) {
+ printf("failed %s-ptr. t3[%d].i3 (%d) != t3[%d].i1 (%d)\n", T::name, i,
+ t3[i].n.i3, i, t3[i].n.i1);
+ return 1;
+ }
+ }
+
+ int pt0 = omp_target_is_present(&t1[0], omp_get_default_device());
+ int pt1 = omp_target_is_present(&t2[1], omp_get_default_device());
+ int pt2 = omp_target_is_present(&t3[2], omp_get_default_device());
+
+ printf("present check for %s-ptr: t1 %i, t2 %i, t3 %i, expected 3x 0\n",
+ T::name, pt0, pt1, pt2);
+ return pt0 + pt1 + pt2;
+}
+
+template <typename T> int testTypeNested() {
+ T t1[2], t2[3], t3[4];
+ testTypeNestedPtr(t1, t2, t3);
+ for (int i = 0; i < 2; i++)
+ t1[i].n.i1 = t3[i].n.i1 = 1;
+
+#pragma omp target map(tofrom : t1, t2, t3)
+ for (int i = 0; i < 2; i++) {
+ t1[i].n.i3 = t3[i].n.i3 = t1[i].n.i1;
+ t1[i].n.i1 = t3[i].n.i1 = 7;
+ }
+
+ for (int i = 0; i < 2; i++) {
+ if (t1[i].n.i3 != t1[i].n.i1) {
+ printf("failed %s. t1[%d].i3 (%d) != t1[%d].i1 (%d)\n", T::name, i,
+ t1[i].n.i3, i, t1[i].n.i1);
+ return 1;
+ }
+ if (t3[i].n.i3 != t3[i].n.i1) {
+ printf("failed %s. t3[%d].i3 (%d) != t3[%d].i1 (%d)\n", T::name, i,
+ t3[i].n.i3, i, t3[i].n.i1);
+ return 1;
+ }
+ }
+
+ int pt0 = omp_target_is_present(&t1[0], omp_get_default_device());
+ int pt1 = omp_target_is_present(&t2[1], omp_get_default_device());
+ int pt2 = omp_target_is_present(&t3[2], omp_get_default_device());
+
+ printf("present check for %s: t1 %i, t2 %i, t3 %i, expected 3x 0\n", T::name,
+ pt0, pt1, pt2);
+ return pt0 + pt1 + pt2;
+}
+
+int main(int argc, char **argv) {
+ int r = 0;
+ r += testType<TY1>();
+ // CHECK: present check for TY1: t1 0, t2 0, t3 0, expected 3x 0
+ r += testType<TY2>();
+ // CHECK: present check for TY2: t1 0, t2 0, t3 0, expected 3x 0
+ r += testTypeNested<TY3>();
+ // CHECK: present check for TY3-ptr: t1 0, t2 0, t3 0, expected 3x 0
+ // CHECK: present check for TY3: t1 0, t2 0, t3 0, expected 3x 0
+ r += testTypeNested<TY4>();
+ // CHECK: present check for TY4-ptr: t1 0, t2 0, t3 0, expected 3x 0
+ // CHECK: present check for TY4: t1 0, t2 0, t3 0, expected 3x 0
+ return r;
+}
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback.cpp
new file mode 100644
index 0000000..5c232d5
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback.cpp
@@ -0,0 +1,32 @@
+// RUN: %libomptarget-compilexx-generic
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer retains its original value by
+// default.
+//
+// This is necessary because we must assume that the
+// pointee is device-accessible, even if it was not
+// previously mapped.
+
+#include <stdio.h>
+
+int x = 0;
+
+struct ST {
+ int *a = &x;
+
+ void f1() {
+ printf("%p\n", a); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(a)
+ printf("%p\n", a); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+ }
+};
+
+int main() {
+ ST s;
+ s.f1();
+}
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_nullify.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_nullify.cpp
new file mode 100644
index 0000000..fca0eee
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_nullify.cpp
@@ -0,0 +1,28 @@
+// RUN: %libomptarget-compilexx-generic -fopenmp-version=61
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic -check-prefixes=CHECK,OFFLOAD
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic -check-prefixes=CHECK,NOOFFLOAD
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer is set to null because of fb_nullify.
+
+#include <stdio.h>
+
+int x = 0;
+
+struct ST {
+ int *a = &x;
+
+ void f1() {
+ printf("%p\n", a); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(fb_nullify : a)
+ printf("%p\n", a); // OFFLOAD-NEXT: (nil)
+ // NOOFFLOAD-NEXT: 0x{{0*}}[[#ADDR]]
+ }
+};
+
+int main() {
+ ST s;
+ s.f1();
+}
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_preserve.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_preserve.cpp
new file mode 100644
index 0000000..51944c5
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_preserve.cpp
@@ -0,0 +1,28 @@
+// RUN: %libomptarget-compilexx-generic -fopenmp-version=61
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer retains its original value
+// because of fb_preserve.
+
+#include <stdio.h>
+
+int x = 0;
+
+struct ST {
+ int *a = &x;
+
+ void f1() {
+ printf("%p\n", a); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(fb_preserve : a)
+ printf("%p\n", a); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+ }
+};
+
+int main() {
+ ST s;
+ s.f1();
+}
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback.cpp
new file mode 100644
index 0000000..59a8fac
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback.cpp
@@ -0,0 +1,33 @@
+// RUN: %libomptarget-compilexx-generic
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer retains its original value by
+// default.
+//
+// This is necessary because we must assume that the
+// pointee is device-accessible, even if it was not
+// previously mapped.
+
+#include <stdio.h>
+
+int x = 0;
+int *y = &x;
+
+struct ST {
+ int *&b = y;
+
+ void f2() {
+ printf("%p\n", b); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(b)
+ printf("%p\n", b); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+ }
+};
+
+int main() {
+ ST s;
+ s.f2();
+}
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_nullify.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_nullify.cpp
new file mode 100644
index 0000000..65c7173
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_nullify.cpp
@@ -0,0 +1,29 @@
+// RUN: %libomptarget-compilexx-generic -fopenmp-version=61
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic -check-prefixes=CHECK,OFFLOAD
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic -check-prefixes=CHECK,NOOFFLOAD
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer is set to null because of fb_nullify.
+
+#include <stdio.h>
+
+int x = 0;
+int *y = &x;
+
+struct ST {
+ int *&b = y;
+
+ void f2() {
+ printf("%p\n", b); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(fb_nullify : b)
+ printf("%p\n", b); // OFFLOAD-NEXT: (nil)
+ // NOOFFLOAD-NEXT: 0x{{0*}}[[#ADDR]]
+ }
+};
+
+int main() {
+ ST s;
+ s.f2();
+}
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_preserve.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_preserve.cpp
new file mode 100644
index 0000000..beeb752
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_preserve.cpp
@@ -0,0 +1,29 @@
+// RUN: %libomptarget-compilexx-generic -fopenmp-version=61
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer retains its original value
+// because of fb_preserve.
+
+#include <stdio.h>
+
+int x = 0;
+int *y = &x;
+
+struct ST {
+ int *&b = y;
+
+ void f2() {
+ printf("%p\n", b); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(fb_preserve : b)
+ printf("%p\n", b); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+ }
+};
+
+int main() {
+ ST s;
+ s.f2();
+}
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.c b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.cpp
index 33a3634..5be209a 100644
--- a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.c
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.cpp
@@ -1,4 +1,8 @@
-// RUN: %libomptarget-compilexx-run-and-check-generic
+// RUN: %libomptarget-compilexx-generic
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
// Test that when a use_device_ptr lookup fails, the
// privatized pointer retains its original value by
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_nullify.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_nullify.cpp
new file mode 100644
index 0000000..984744cd
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_nullify.cpp
@@ -0,0 +1,21 @@
+// RUN: %libomptarget-compilexx-generic -fopenmp-version=61
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic -check-prefixes=CHECK,OFFLOAD
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic -check-prefixes=CHECK,NOOFFLOAD
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer is set to null because of fb_nullify.
+
+#include <stdio.h>
+int x;
+int *xp = &x;
+
+void f1() {
+ printf("%p\n", xp); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(fb_nullify : xp)
+ printf("%p\n", xp); // OFFLOAD-NEXT: (nil)
+ // NOOFFLOAD-NEXT: 0x{{0*}}[[#ADDR]]
+}
+
+int main() { f1(); }
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_preserve.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_preserve.cpp
new file mode 100644
index 0000000..197704f
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_preserve.cpp
@@ -0,0 +1,21 @@
+// RUN: %libomptarget-compilexx-generic -fopenmp-version=61
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer retains its original value
+// because of fb_preserve.
+
+#include <stdio.h>
+int x;
+int *xp = &x;
+
+void f1() {
+ printf("%p\n", xp); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(fb_preserve : xp)
+ printf("%p\n", xp); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+}
+
+int main() { f1(); }
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback.cpp
new file mode 100644
index 0000000..1060ed9
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback.cpp
@@ -0,0 +1,26 @@
+// RUN: %libomptarget-compilexx-generic
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer retains its original value by
+// default.
+//
+// This is necessary because we must assume that the
+// pointee is device-accessible, even if it was not
+// previously mapped.
+
+#include <stdio.h>
+int x;
+int *xp = &x;
+int *&xpr = xp;
+
+void f2() {
+ printf("%p\n", xpr); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(xpr)
+ printf("%p\n", xpr); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+}
+
+int main() { f2(); }
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_nullify.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_nullify.cpp
new file mode 100644
index 0000000..7fa76dd6
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_nullify.cpp
@@ -0,0 +1,24 @@
+// RUN: %libomptarget-compilexx-generic -fopenmp-version=61
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic -check-prefixes=CHECK,OFFLOAD
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic -check-prefixes=CHECK,NOOFFLOAD
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer is set to null because of fb_nullify.
+
+#include <stdio.h>
+int x;
+int *xp = &x;
+int *&xpr = xp;
+
+void f2() {
+ printf("%p\n", xpr); // CHECK: 0x[[#%x,ADDR:]]
+ // FIXME: We won't get "nil" until we start privatizing xpr.
+#pragma omp target data use_device_ptr(fb_nullify : xpr)
+ printf("%p\n", xpr); // EXPECTED-OFFLOAD-NEXT: (nil)
+ // OFFLOAD-NEXT: 0x{{0*}}[[#ADDR]]
+ // NOOFFLOAD-NEXT: 0x{{0*}}[[#ADDR]]
+}
+
+int main() { f2(); }
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_preserve.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_preserve.cpp
new file mode 100644
index 0000000..e7f8bd4
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_preserve.cpp
@@ -0,0 +1,22 @@
+// RUN: %libomptarget-compilexx-generic -fopenmp-version=61
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer retains its original value
+// because of fb_preserve.
+
+#include <stdio.h>
+int x;
+int *xp = &x;
+int *&xpr = xp;
+
+void f2() {
+ printf("%p\n", xpr); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(fb_preserve : xpr)
+ printf("%p\n", xpr); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+}
+
+int main() { f2(); }
diff --git a/offload/test/offloading/fortran/default-mapper-derived-enter-data-teams-collapse.f90 b/offload/test/offloading/fortran/default-mapper-derived-enter-data-teams-collapse.f90
new file mode 100644
index 0000000..6b87e81
--- /dev/null
+++ b/offload/test/offloading/fortran/default-mapper-derived-enter-data-teams-collapse.f90
@@ -0,0 +1,53 @@
+! Regression test for default mappers on nested derived types with allocatable
+! members when mapping a parent object and running an optimized target region.
+
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic -O3
+! RUN: %libomptarget-run-generic | %fcheck-generic
+
+program test_default_mapper_enter_data_teams_collapse
+ implicit none
+
+ type inner_type
+ real, allocatable :: data(:)
+ end type inner_type
+
+ type outer_type
+ type(inner_type) :: inner
+ character(len=19) :: desc = ' '
+ end type outer_type
+
+ type(outer_type) :: obj
+ integer, parameter :: n = 10
+ integer :: i, j
+ real :: expected, actual
+
+ allocate(obj%inner%data(n))
+ obj%inner%data = 0.0
+
+ !$omp target enter data map(to: obj)
+
+ !$omp target teams distribute parallel do collapse(2)
+ do i = 1, n
+ do j = 1, n
+ obj%inner%data(i) = real(i)
+ end do
+ end do
+ !$omp end target teams distribute parallel do
+
+ !$omp target exit data map(from: obj)
+
+ expected = real(n * (n + 1)) / 2.0
+ actual = sum(obj%inner%data)
+
+ if (abs(actual - expected) < 1.0e-6) then
+ print *, "PASS"
+ else
+ print *, "FAIL", actual, expected
+ end if
+
+ deallocate(obj%inner%data)
+end program test_default_mapper_enter_data_teams_collapse
+
+! CHECK: PASS
diff --git a/offload/test/offloading/fortran/recursive-default-mapper.f90 b/offload/test/offloading/fortran/recursive-default-mapper.f90
new file mode 100644
index 0000000..47b706d
--- /dev/null
+++ b/offload/test/offloading/fortran/recursive-default-mapper.f90
@@ -0,0 +1,40 @@
+! Offloading test for recursive default mapper emission
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+
+module recursive_mapper_mod
+ implicit none
+
+ type :: inner
+ integer :: value
+ type(inner), pointer :: next
+ end type inner
+
+ type :: outer
+ integer, allocatable :: arr(:)
+ type(inner), pointer :: head
+ end type outer
+
+contains
+
+end module recursive_mapper_mod
+
+program main
+ use recursive_mapper_mod
+ implicit none
+
+ type(outer) :: o
+
+ allocate(o%arr(2))
+ o%arr = [1, 2]
+
+ !$omp target map(tofrom: o)
+ o%arr(1) = o%arr(1) + 1
+ o%arr(2) = o%arr(2) + 1
+ !$omp end target
+
+ print *, o%arr(1), o%arr(2)
+end program main
+
+! CHECK: 2 3
diff --git a/offload/test/offloading/fortran/target-defaultmap-implicit-mapper.f90 b/offload/test/offloading/fortran/target-defaultmap-implicit-mapper.f90
new file mode 100644
index 0000000..d2d8f7a
--- /dev/null
+++ b/offload/test/offloading/fortran/target-defaultmap-implicit-mapper.f90
@@ -0,0 +1,49 @@
+! Offload test that ensures defaultmap(tofrom: scalar) does not suppress
+! implicit default mapper generation for allocatable derived types.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program defaultmap_implicit_mapper
+ implicit none
+
+ type :: payload_t
+ integer, allocatable :: arr(:)
+ end type payload_t
+
+ type(payload_t), allocatable :: obj
+ integer, parameter :: n = 8
+ integer :: i
+ integer :: scalar
+ logical :: ok
+
+ allocate(obj)
+ allocate(obj%arr(n))
+ obj%arr = 1
+ scalar = 2
+
+ !$omp target defaultmap(tofrom: scalar)
+ do i = 1, n
+ obj%arr(i) = obj%arr(i) + scalar
+ end do
+ scalar = 7
+ !$omp end target
+
+ ok = .true.
+ do i = 1, n
+ if (obj%arr(i) /= 3) ok = .false.
+ end do
+ if (scalar /= 7) ok = .false.
+
+ if (ok) then
+ print *, "Test passed!"
+ else
+ print *, "Test failed!"
+ print *, obj%arr
+ print *, scalar
+ end if
+
+ deallocate(obj%arr)
+ deallocate(obj)
+end program defaultmap_implicit_mapper
+
+! CHECK: Test passed!
diff --git a/offload/test/offloading/fortran/target-parameter-array.f90 b/offload/test/offloading/fortran/target-parameter-array.f90
new file mode 100644
index 0000000..b85fb06
--- /dev/null
+++ b/offload/test/offloading/fortran/target-parameter-array.f90
@@ -0,0 +1,131 @@
+! Offload test for parameter (constant) arrays and character scalars accessed
+! with dynamic indices/substrings in OpenMP target regions.
+
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+
+program test_parameter_mapping
+ implicit none
+ integer, parameter :: dp = selected_real_kind(15, 307)
+ logical :: all_tests_pass
+
+ all_tests_pass = .true.
+
+ ! Test 1: Parameter array with dynamic index
+ call test_param_array_dynamic_index(all_tests_pass)
+
+ ! Test 2: Integer parameter array
+ call test_int_param_array(all_tests_pass)
+
+ ! Test 3: Character scalar with dynamic substring
+ call test_char_substring(all_tests_pass)
+
+ ! Test 4: Verify scalar parameters work (inlined)
+ call test_scalar_param(all_tests_pass)
+
+ if (all_tests_pass) then
+ print *, "PASS"
+ else
+ print *, "FAIL"
+ endif
+
+contains
+
+! Test 1: Parameter array with dynamic index in target region
+subroutine test_param_array_dynamic_index(test_pass)
+ logical, intent(inout) :: test_pass
+ real(dp), parameter :: const_array(3) = [1.0_dp, 2.0_dp, 3.0_dp]
+ integer :: idx
+ real(dp) :: result
+ real(dp), parameter :: expected = 2.0_dp
+ real(dp), parameter :: tolerance = 1.0e-10_dp
+
+ idx = 2
+ result = 0.0_dp
+
+ !$omp target map(tofrom:result) map(to:idx)
+ ! Access parameter array with dynamic index
+ result = const_array(idx)
+ !$omp end target
+
+ if (abs(result - expected) > tolerance) then
+ print *, "Test 1 FAILED: expected", expected, "got", result
+ test_pass = .false.
+ endif
+end subroutine test_param_array_dynamic_index
+
+! Test 2: Integer parameter array with different indices
+subroutine test_int_param_array(test_pass)
+ logical, intent(inout) :: test_pass
+ integer, parameter :: int_array(4) = [10, 20, 30, 40]
+ integer :: idx1, idx2
+ integer :: result1, result2
+
+ idx1 = 1
+ idx2 = 4
+ result1 = 0
+ result2 = 0
+
+ !$omp target map(tofrom:result1, result2) map(to:idx1, idx2)
+ ! Access parameter array with different dynamic indices
+ result1 = int_array(idx1)
+ result2 = int_array(idx2)
+ !$omp end target
+
+ if (result1 /= 10 .or. result2 /= 40) then
+ print *, "Test 2 FAILED: expected 10, 40 got", result1, result2
+ test_pass = .false.
+ endif
+end subroutine test_int_param_array
+
+! Test 3: Character scalar parameter with dynamic substring access
+subroutine test_char_substring(test_pass)
+ logical, intent(inout) :: test_pass
+ character(len=20), parameter :: char_scalar = "constant_string_data"
+ integer :: start_idx, end_idx
+ character(len=8) :: result
+ character(len=8), parameter :: expected = "string_d"
+
+ start_idx = 10
+ end_idx = 17
+ result = ""
+
+ !$omp target map(tofrom:result) map(to:start_idx, end_idx)
+ ! Dynamic substring access - character scalar must be mapped
+ result = char_scalar(start_idx:end_idx)
+ !$omp end target
+
+ if (result /= expected) then
+ print *, "Test 3 FAILED: expected '", expected, "' got '", result, "'"
+ test_pass = .false.
+ endif
+end subroutine test_char_substring
+
+! Test 4: Scalar parameter (can be inlined, no mapping needed)
+subroutine test_scalar_param(test_pass)
+ logical, intent(inout) :: test_pass
+ integer, parameter :: scalar_const = 42
+ real(dp), parameter :: real_const = 3.14159_dp
+ integer :: int_result
+ real(dp) :: real_result
+ real(dp), parameter :: tolerance = 1.0e-5_dp
+
+ int_result = 0
+ real_result = 0.0_dp
+
+ !$omp target map(tofrom:int_result, real_result)
+ ! Scalar parameters should be inlined (no mapping needed)
+ int_result = scalar_const
+ real_result = real_const
+ !$omp end target
+
+ if (int_result /= 42 .or. abs(real_result - real_const) > tolerance) then
+ print *, "Test 4 FAILED: expected 42, 3.14159 got", int_result, real_result
+ test_pass = .false.
+ endif
+end subroutine test_scalar_param
+
+end program test_parameter_mapping
+
+! CHECK: PASS
diff --git a/offload/test/offloading/strided_multiple_update.c b/offload/test/offloading/strided_multiple_update_from.c
index 4f2df81..4f2df81 100644
--- a/offload/test/offloading/strided_multiple_update.c
+++ b/offload/test/offloading/strided_multiple_update_from.c
diff --git a/offload/test/offloading/strided_multiple_update_to.c b/offload/test/offloading/strided_multiple_update_to.c
new file mode 100644
index 0000000..bb16d7a
--- /dev/null
+++ b/offload/test/offloading/strided_multiple_update_to.c
@@ -0,0 +1,124 @@
+// This test checks that #pragma omp target update to(data1[0:3:4],
+// data2[0:2:5]) correctly updates disjoint strided sections of multiple arrays
+// from the host to the device.
+
+// RUN: %libomptarget-compile-run-and-check-generic
+// XFAIL: intelgpu
+
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+ int len = 12;
+ double data1[len], data2[len];
+
+ // Initialize host arrays
+ for (int i = 0; i < len; i++) {
+ data1[i] = i;
+ data2[i] = i * 10;
+ }
+
+ printf("original host array values:\n");
+ printf("data1:\n");
+ for (int i = 0; i < len; i++)
+ printf("%.1f\n", data1[i]);
+ printf("data2:\n");
+ for (int i = 0; i < len; i++)
+ printf("%.1f\n", data2[i]);
+
+ // CHECK: original host array values:
+ // CHECK-NEXT: data1:
+ // CHECK-NEXT: 0.0
+ // CHECK-NEXT: 1.0
+ // CHECK-NEXT: 2.0
+ // CHECK-NEXT: 3.0
+ // CHECK-NEXT: 4.0
+ // CHECK-NEXT: 5.0
+ // CHECK-NEXT: 6.0
+ // CHECK-NEXT: 7.0
+ // CHECK-NEXT: 8.0
+ // CHECK-NEXT: 9.0
+ // CHECK-NEXT: 10.0
+ // CHECK-NEXT: 11.0
+ // CHECK-NEXT: data2:
+ // CHECK-NEXT: 0.0
+ // CHECK-NEXT: 10.0
+ // CHECK-NEXT: 20.0
+ // CHECK-NEXT: 30.0
+ // CHECK-NEXT: 40.0
+ // CHECK-NEXT: 50.0
+ // CHECK-NEXT: 60.0
+ // CHECK-NEXT: 70.0
+ // CHECK-NEXT: 80.0
+ // CHECK-NEXT: 90.0
+ // CHECK-NEXT: 100.0
+ // CHECK-NEXT: 110.0
+
+#pragma omp target data map(tofrom : data1[0 : len], data2[0 : len])
+ {
+ // Initialize device arrays to 20
+#pragma omp target
+ {
+ for (int i = 0; i < len; i++) {
+ data1[i] = 20.0;
+ data2[i] = 20.0;
+ }
+ }
+
+ // Modify host arrays for strided elements
+ data1[0] = 10.0;
+ data1[4] = 10.0;
+ data1[8] = 10.0;
+ data2[0] = 10.0;
+ data2[5] = 10.0;
+
+ // data1[0:3:4] // indices 0,4,8
+ // data2[0:2:5] // indices 0,5
+#pragma omp target update to(data1[0 : 3 : 4], data2[0 : 2 : 5])
+
+ // Verify on device by adding 5
+#pragma omp target
+ {
+ for (int i = 0; i < len; i++)
+ data1[i] += 5.0;
+ for (int i = 0; i < len; i++)
+ data2[i] += 5.0;
+ }
+ }
+
+ printf("device array values after update to:\n");
+ printf("data1:\n");
+ for (int i = 0; i < len; i++)
+ printf("%.1f\n", data1[i]);
+ printf("data2:\n");
+ for (int i = 0; i < len; i++)
+ printf("%.1f\n", data2[i]);
+
+ // CHECK: device array values after update to:
+ // CHECK-NEXT: data1:
+ // CHECK-NEXT: 15.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 15.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 15.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: data2:
+ // CHECK-NEXT: 15.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 15.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+ // CHECK-NEXT: 25.0
+}
diff --git a/offload/test/offloading/strided_partial_update.c b/offload/test/offloading/strided_partial_update_from.c
index 4a2977a..4a2977a 100644
--- a/offload/test/offloading/strided_partial_update.c
+++ b/offload/test/offloading/strided_partial_update_from.c
diff --git a/offload/test/offloading/strided_partial_update_to.c b/offload/test/offloading/strided_partial_update_to.c
new file mode 100644
index 0000000..f9c960f
--- /dev/null
+++ b/offload/test/offloading/strided_partial_update_to.c
@@ -0,0 +1,74 @@
+// This test checks that #pragma omp target update to(data[0:4:3]) correctly
+// updates every third element (stride 3) from the host to the device, partially
+// across the array
+
+// RUN: %libomptarget-compile-run-and-check-generic
+// XFAIL: intelgpu
+
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+ int len = 11;
+ double data[len];
+
+ // Initialize on host
+ for (int i = 0; i < len; i++)
+ data[i] = i;
+
+ // Initial values
+ printf("original host array values:\n");
+ for (int i = 0; i < len; i++)
+ printf("%f\n", data[i]);
+ printf("\n");
+
+ // CHECK: 0.000000
+ // CHECK: 1.000000
+ // CHECK: 2.000000
+ // CHECK: 3.000000
+ // CHECK: 4.000000
+ // CHECK: 5.000000
+ // CHECK: 6.000000
+ // CHECK: 7.000000
+ // CHECK: 8.000000
+ // CHECK: 9.000000
+ // CHECK: 10.000000
+
+#pragma omp target data map(tofrom : data[0 : len])
+ {
+ // Initialize device array to 20
+#pragma omp target
+ for (int i = 0; i < len; i++)
+ data[i] = 20.0;
+
+ // Modify host data for strided elements
+ data[0] = 10.0;
+ data[3] = 10.0;
+ data[6] = 10.0;
+ data[9] = 10.0;
+
+#pragma omp target update to(data[0 : 4 : 3]) // indices 0,3,6,9
+
+ // Verify on device by adding 5
+#pragma omp target
+ for (int i = 0; i < len; i++)
+ data[i] += 5.0;
+ }
+
+ printf("device array values after update to:\n");
+ for (int i = 0; i < len; i++)
+ printf("%f\n", data[i]);
+ printf("\n");
+
+ // CHECK: 15.000000
+ // CHECK: 25.000000
+ // CHECK: 25.000000
+ // CHECK: 15.000000
+ // CHECK: 25.000000
+ // CHECK: 25.000000
+ // CHECK: 15.000000
+ // CHECK: 25.000000
+ // CHECK: 25.000000
+ // CHECK: 15.000000
+ // CHECK: 25.000000
+}
diff --git a/offload/test/offloading/strided_update.c b/offload/test/offloading/strided_update_from.c
index 9910bed..9910bed 100644
--- a/offload/test/offloading/strided_update.c
+++ b/offload/test/offloading/strided_update_from.c
diff --git a/offload/test/offloading/strided_update_to.c b/offload/test/offloading/strided_update_to.c
new file mode 100644
index 0000000..eca20ab
--- /dev/null
+++ b/offload/test/offloading/strided_update_to.c
@@ -0,0 +1,74 @@
+// This test checks that "update to" clause in OpenMP is supported when the
+// elements are updated in a non-contiguous manner. This test checks that
+// #pragma omp target update to(data[0:4:2]) correctly updates only every
+// other element (stride 2) from the host to the device
+
+// RUN: %libomptarget-compile-run-and-check-generic
+// XFAIL: intelgpu
+
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+ int len = 8;
+ double data[len];
+
+ // Initialize on host
+ for (int i = 0; i < len; i++) {
+ data[i] = i;
+ }
+
+ // Initial values
+ printf("original host array values:\n");
+ for (int i = 0; i < len; i++)
+ printf("%f\n", data[i]);
+ printf("\n");
+
+#pragma omp target data map(tofrom : len, data[0 : len])
+ {
+ // Initialize device to 20
+#pragma omp target
+ for (int i = 0; i < len; i++) {
+ data[i] = 20.0;
+ }
+
+ // Modify host for strided elements
+ data[0] = 10.0;
+ data[2] = 10.0;
+ data[4] = 10.0;
+ data[6] = 10.0;
+
+#pragma omp target update to(data[0 : 4 : 2])
+
+ // Verify on device by adding 5
+#pragma omp target
+ for (int i = 0; i < len; i++) {
+ data[i] += 5.0;
+ }
+ }
+
+ // CHECK: 0.000000
+ // CHECK: 1.000000
+ // CHECK: 2.000000
+ // CHECK: 3.000000
+ // CHECK: 4.000000
+ // CHECK: 5.000000
+ // CHECK: 6.000000
+ // CHECK: 7.000000
+
+ printf("device array values after update to:\n");
+ for (int i = 0; i < len; i++)
+ printf("%f\n", data[i]);
+ printf("\n");
+
+ // CHECK: 15.000000
+ // CHECK: 25.000000
+ // CHECK: 15.000000
+ // CHECK: 25.000000
+ // CHECK: 15.000000
+ // CHECK: 25.000000
+ // CHECK: 15.000000
+ // CHECK: 25.000000
+
+ return 0;
+}
diff --git a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
index 47a299f..718e9e7 100644
--- a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
+++ b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp
@@ -15,6 +15,7 @@
#include "llvm/Frontend/Offloading/Utility.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -60,11 +61,11 @@ int main(int argc, char **argv) {
MemoryBuffer::getFile(InputFilename, /*isText=*/true,
/*RequiresNullTerminator=*/true);
if (!KernelInfoMB)
- report_fatal_error("Error reading the kernel info json file");
+ reportFatalUsageError("Error reading the kernel info json file");
Expected<json::Value> JsonKernelInfo =
json::parse(KernelInfoMB.get()->getBuffer());
if (auto Err = JsonKernelInfo.takeError())
- report_fatal_error("Cannot parse the kernel info json file");
+ reportFatalUsageError("Cannot parse the kernel info json file");
auto NumTeamsJson =
JsonKernelInfo->getAsObject()->getInteger("NumTeamsClause");
@@ -104,7 +105,7 @@ int main(int argc, char **argv) {
MemoryBuffer::getFile(KernelEntryName + ".image", /*isText=*/false,
/*RequiresNullTerminator=*/false);
if (!ImageMB)
- report_fatal_error("Error reading the kernel image.");
+ reportFatalUsageError("Error reading the kernel image.");
__tgt_device_image DeviceImage;
DeviceImage.ImageStart = const_cast<char *>(ImageMB.get()->getBufferStart());
@@ -145,7 +146,7 @@ int main(int argc, char **argv) {
/*RequiresNullTerminator=*/false);
if (!DeviceMemoryMB)
- report_fatal_error("Error reading the kernel input device memory.");
+ reportFatalUsageError("Error reading the kernel input device memory.");
// On AMD for currently unknown reasons we cannot copy memory mapped data to
// device. This is a work-around.
@@ -178,14 +179,15 @@ int main(int argc, char **argv) {
/*isText=*/false,
/*RequiresNullTerminator=*/false);
if (!OriginalOutputMB)
- report_fatal_error("Error reading the kernel original output file, make "
- "sure LIBOMPTARGET_SAVE_OUTPUT is set when recording");
+ reportFatalUsageError(
+ "Error reading the kernel original output file, make sure "
+ "LIBOMPTARGET_SAVE_OUTPUT is set when recording");
ErrorOr<std::unique_ptr<MemoryBuffer>> ReplayOutputMB =
MemoryBuffer::getFile(KernelEntryName + ".replay.output",
/*isText=*/false,
/*RequiresNullTerminator=*/false);
if (!ReplayOutputMB)
- report_fatal_error("Error reading the kernel replay output file");
+ reportFatalUsageError("Error reading the kernel replay output file");
StringRef OriginalOutput = OriginalOutputMB.get()->getBuffer();
StringRef ReplayOutput = ReplayOutputMB.get()->getBuffer();
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index 546d7da..fce38c5 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -48,7 +48,8 @@ add_offload_unittest("queue"
queue/olGetQueueInfo.cpp
queue/olGetQueueInfoSize.cpp
queue/olWaitEvents.cpp
- queue/olLaunchHostFunction.cpp)
+ queue/olLaunchHostFunction.cpp
+ queue/olQueryQueue.cpp)
add_offload_unittest("symbol"
symbol/olGetSymbol.cpp
diff --git a/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
new file mode 100644
index 0000000..4efd939
--- /dev/null
+++ b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp
@@ -0,0 +1,24 @@
+//===------- Offload API tests - olQueryQueue ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olQueryQueueTest = OffloadQueueTest;
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olQueryQueueTest);
+
+TEST_P(olQueryQueueTest, SuccessEmptyAsyncQueue) {
+ ASSERT_SUCCESS(olQueryQueue(Queue, nullptr));
+}
+
+TEST_P(olQueryQueueTest, SuccessEmptyAsyncQueueCheckResult) {
+ bool IsQueueWorkCompleted;
+ ASSERT_SUCCESS(olQueryQueue(Queue, &IsQueueWorkCompleted));
+ ASSERT_TRUE(IsQueueWorkCompleted);
+}