diff options
| author | Peter Collingbourne <peter@pcc.me.uk> | 2026-01-29 14:39:34 -0800 |
|---|---|---|
| committer | Peter Collingbourne <peter@pcc.me.uk> | 2026-01-29 14:39:34 -0800 |
| commit | 7b3f189a1369f9348c007730ddea953b1e68acb1 (patch) | |
| tree | 7db8969ee8a34a10b6c8ae033c939c9d653376f6 /offload | |
| parent | f3d6dae13ae710323a2ddbaf87af71b1abcbfada (diff) | |
| parent | 0893b70ecfc4f4aca0a20a078476d191edc1e623 (diff) | |
| download | llvm-users/pcc/spr/codegen-introduce-machinefunctiongetpreferredalignment.zip llvm-users/pcc/spr/codegen-introduce-machinefunctiongetpreferredalignment.tar.gz llvm-users/pcc/spr/codegen-introduce-machinefunctiongetpreferredalignment.tar.bz2 | |
Created using spr 1.3.6-beta.1
Diffstat (limited to 'offload')
45 files changed, 1380 insertions, 87 deletions
diff --git a/offload/ci/.gitignore b/offload/ci/.gitignore new file mode 100644 index 0000000..f2198f0 --- /dev/null +++ b/offload/ci/.gitignore @@ -0,0 +1 @@ +/*.workdir diff --git a/offload/ci/openmp-offload-amdgpu-clang-flang.py b/offload/ci/openmp-offload-amdgpu-clang-flang.py new file mode 100755 index 0000000..f55ae02 --- /dev/null +++ b/offload/ci/openmp-offload-amdgpu-clang-flang.py @@ -0,0 +1,71 @@ +#! /usr/bin/env python3 +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import sys + +# Adapt to location in source tree +llvmsrcroot = os.path.normpath(f"{__file__}/../../..") + +sys.path.insert(0, os.path.join(llvmsrcroot, ".ci/buildbot")) +import worker + +llvmbuilddir = "llvm.build" +llvminstalldir = "llvm.inst" + +with worker.run( + __file__, + llvmsrcroot, + clobberpaths=[llvmbuilddir, llvminstalldir], + workerjobs=64, +) as w: + with w.step("configure-openmp", halt_on_fail=True): + w.run_command( + [ + "cmake", + f"-S{w.in_llvmsrc('llvm')}", + f"-B{llvmbuilddir}", + "-GNinja", + "-DCMAKE_BUILD_TYPE=Release", + "-DLLVM_ENABLE_ASSERTIONS=ON", + f"-DLLVM_LIT_ARGS=-vv --show-unsupported --show-xfail -j {w.jobs} --time-tests --timeout 100", + f"-DCMAKE_INSTALL_PREFIX={w.in_workdir(llvminstalldir)}", + "-DCLANG_DEFAULT_LINKER=lld", + "-DLLVM_TARGETS_TO_BUILD=X86;AMDGPU", + "-DLLVM_ENABLE_ASSERTIONS=ON", + "-DCMAKE_C_COMPILER_LAUNCHER=ccache", + "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", + "-DFLANG_RUNTIME_F128_MATH_LIB=libquadmath", + "-DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON", + "-DCMAKE_CXX_STANDARD=17", + "-DBUILD_SHARED_LIBS=ON", + "-DLIBOMPTARGET_PLUGINS_TO_BUILD=amdgpu;host", + "-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=compiler-rt;openmp", + "-DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa", + "-DCOMPILER_RT_BUILD_ORC=OFF", + "-DCOMPILER_RT_BUILD_XRAY=OFF", + "-DCOMPILER_RT_BUILD_MEMPROF=OFF", + "-DCOMPILER_RT_BUILD_LIBFUZZER=OFF", + "-DCOMPILER_RT_BUILD_SANITIZERS=ON", + "-DLLVM_ENABLE_PROJECTS=clang;lld;mlir;flang;llvm", + "-DLLVM_ENABLE_RUNTIMES=flang-rt;offload;compiler-rt;openmp", + ] + ) + + with w.step("compile-openmp", halt_on_fail=True): + w.run_ninja(builddir=llvmbuilddir, ccache_stats=True) + + with w.step("test-openmp"): + w.run_ninja( + ["check-openmp"], add_env={"HSA_ENABLE_SDMA": "0"}, builddir=llvmbuilddir + ) + + with w.step("Add check check-offload"): + w.run_ninja( + ["check-offload"], add_env={"HSA_ENABLE_SDMA": "0"}, builddir=llvmbuilddir + ) + + with w.step("LLVM: Install", halt_on_fail=True): + w.run_ninja(["install"], builddir=llvmbuilddir) diff --git a/offload/ci/openmp-offload-amdgpu-runtime.py b/offload/ci/openmp-offload-amdgpu-runtime.py new file mode 100755 index 0000000..e93f54f --- /dev/null +++ b/offload/ci/openmp-offload-amdgpu-runtime.py @@ -0,0 +1,60 @@ +#! /usr/bin/env python3 +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import sys + +# Adapt to location in source tree +llvmsrcroot = os.path.normpath(f"{__file__}/../../..") + +sys.path.insert(0, os.path.join(llvmsrcroot, ".ci/buildbot")) +import worker + +llvmbuilddir = "llvm.build" +llvminstalldir = "llvm.inst" + +with worker.run( + __file__, + llvmsrcroot, + clobberpaths=[llvmbuilddir, llvminstalldir], + workerjobs=32, +) as w: + with w.step("configure-openmp", halt_on_fail=True): + w.run_command( + [ + "cmake", + f"-S{w.in_llvmsrc('llvm')}", + f"-B{llvmbuilddir}", + "-GNinja", + "-DCMAKE_BUILD_TYPE=Release", + "-DLLVM_ENABLE_ASSERTIONS=ON", + f"-DLLVM_LIT_ARGS=-vv --show-unsupported --show-xfail -j {w.jobs} --time-tests --timeout 100", + f"-DCMAKE_INSTALL_PREFIX={w.in_workdir(llvminstalldir)}", + "-DCLANG_DEFAULT_LINKER=lld", + "-DLLVM_TARGETS_TO_BUILD=X86;AMDGPU", + "-DCMAKE_C_COMPILER_LAUNCHER=ccache", + "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache", + "-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=compiler-rt;openmp", + "-DLLVM_RUNTIME_TARGETS=default;amdgcn-amd-amdhsa", + "-DLLVM_ENABLE_PROJECTS=clang;lld;llvm", + "-DLLVM_ENABLE_RUNTIMES=offload;compiler-rt;openmp", + ] + ) + + with w.step("compile-openmp", halt_on_fail=True): + w.run_ninja(builddir=llvmbuilddir, ccache_stats=True) + + with w.step("test-openmp"): + w.run_ninja( + ["check-openmp"], add_env={"HSA_ENABLE_SDMA": "0"}, builddir=llvmbuilddir + ) + + with w.step("Add check check-offload"): + w.run_ninja( + ["check-offload"], add_env={"HSA_ENABLE_SDMA": "0"}, builddir=llvmbuilddir + ) + + with w.step("LLVM: Install", halt_on_fail=True): + w.run_ninja(["install"], builddir=llvmbuilddir) diff --git a/offload/cmake/caches/FlangOffload.cmake b/offload/cmake/caches/FlangOffload.cmake new file mode 100644 index 0000000..ed75df6 --- /dev/null +++ b/offload/cmake/caches/FlangOffload.cmake @@ -0,0 +1,13 @@ +set(LLVM_ENABLE_PROJECTS "clang;flang;mlir;clang-tools-extra;lld" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;libcxx;libcxxabi;openmp;offload" CACHE STRING "") +set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "") + +set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "") +set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "") +set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "") +set(RUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBC_PROVIDER llvm CACHE STRING "") +set(RUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBCXX_PROVIDER llvm CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_FLANG_RT_LIBC_PROVIDER llvm CACHE STRING "") +set(RUNTIMES_amdgcn-amd-amdhsa_FLANG_RT_LIBCXX_PROVIDER llvm CACHE STRING "") diff --git a/offload/include/Shared/Debug.h b/offload/include/Shared/Debug.h index f1c3386..0f98a44 100644 --- a/offload/include/Shared/Debug.h +++ b/offload/include/Shared/Debug.h @@ -45,7 +45,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Format.h" -#include "llvm/Support/circular_raw_ostream.h" +#include "llvm/Support/raw_ostream.h" /// 32-Bit field data attributes controlling information presented to the user. enum OpenMPInfoType : uint32_t { @@ -142,7 +142,7 @@ inline uint32_t getInfoLevel() { return getInfoLevelInternal().load(); } #define INFO(_flags, _id, ...) \ do { \ if (::llvm::offload::debug::isDebugEnabled()) { \ - DP(__VA_ARGS__); \ + INFO_DEBUG_INT(_flags, _id, __VA_ARGS__); \ } else if (getInfoLevel() & _flags) { \ INFO_MESSAGE(_id, __VA_ARGS__); \ } \ @@ -174,6 +174,14 @@ private: bool ShouldEmitNewLineOnDestruction; bool NeedEndNewLine = false; + /// Buffer to reduce interference between different threads + /// writing at the same time to the underlying stream. + static constexpr size_t BufferSize = 256; + llvm::SmallString<BufferSize> Buffer; + + // Stream to write into Buffer. Its flushed to Os upon destruction. + llvm::raw_svector_ostream BufferStrm; + /// If the stream is muted, writes to it are ignored bool Muted = false; @@ -203,13 +211,13 @@ private: NeedEndNewLine = true; } } - void emitPrefix() { Os.write(Prefix.c_str(), Prefix.size()); } + void emitPrefix() { BufferStrm.write(Prefix.c_str(), Prefix.size()); } void writeWithPrefix(StringRef Str) { if (ShouldPrefixNextString) { emitPrefix(); ShouldPrefixNextString = false; } - Os.write(Str.data(), Str.size()); + BufferStrm.write(Str.data(), Str.size()); } public: @@ -218,26 +226,29 @@ public: bool ShouldEmitNewLineOnDestruction = true) : Prefix(std::move(Prefix)), Os(Os), BaseLevel(BaseLevel), ShouldPrefixNextString(ShouldPrefixNextString), - ShouldEmitNewLineOnDestruction(ShouldEmitNewLineOnDestruction) { + ShouldEmitNewLineOnDestruction(ShouldEmitNewLineOnDestruction), + BufferStrm(Buffer) { SetUnbuffered(); } ~odbg_ostream() final { if (ShouldEmitNewLineOnDestruction && NeedEndNewLine) - Os << '\n'; + BufferStrm << '\n'; + Os << BufferStrm.str(); } odbg_ostream(const odbg_ostream &) = delete; odbg_ostream &operator=(const odbg_ostream &) = delete; - odbg_ostream(odbg_ostream &&other) : Os(other.Os) { + odbg_ostream(odbg_ostream &&other) : Os(other.Os), BufferStrm(Buffer) { Prefix = std::move(other.Prefix); BaseLevel = other.BaseLevel; ShouldPrefixNextString = other.ShouldPrefixNextString; ShouldEmitNewLineOnDestruction = other.ShouldEmitNewLineOnDestruction; NeedEndNewLine = other.NeedEndNewLine; Muted = other.Muted; + BufferStrm << other.BufferStrm.str(); } /// Forward the current_pos method to the underlying stream. - uint64_t current_pos() const final { return Os.tell(); } + uint64_t current_pos() const final { return BufferStrm.tell(); } /// Some of the `<<` operators expect an lvalue, so we trick the type /// system. @@ -247,17 +258,8 @@ public: void shouldMute(const OnlyLevel Filter) { Muted = BaseLevel != Filter; } }; -/// dbgs - Return a circular-buffered debug stream. -[[maybe_unused]] static llvm::raw_ostream &dbgs() { - // Do one-time initialization in a thread-safe way. - static struct dbgstream { - llvm::circular_raw_ostream strm; - - dbgstream() : strm(llvm::errs(), "*** Debug Log Output ***\n", 0) {} - } thestrm; - - return thestrm.strm; -} +/// dbgs - Return the debug stream for offload debugging (just llvm::errs()). +[[maybe_unused]] static llvm::raw_ostream &dbgs() { return llvm::errs(); } #ifdef OMPTARGET_DEBUG @@ -306,8 +308,14 @@ struct DebugSettings { return; Settings.Enabled = true; - if (EnvRef.equals_insensitive("all")) - return; + + if (EnvRef.starts_with_insensitive("all")) { + auto Spec = parseDebugFilter(EnvRef); + if (Spec.Type.equals_insensitive("all")) { + Settings.DefaultLevel = Spec.Level; + return; + } + } if (!EnvRef.getAsInteger(10, Settings.DefaultLevel)) return; @@ -618,7 +626,38 @@ static inline odbg_ostream reportErrorStream() { #define FORMAT_TO_STR(Format, ...) \ ::llvm::omp::target::debug::formatToStr(Format __VA_OPT__(, ) __VA_ARGS__) -#define DP(...) ODBG() << FORMAT_TO_STR(__VA_ARGS__); +template <uint32_t InfoId> static constexpr const char *InfoIdToODT() { + constexpr auto getId = []() { + switch (InfoId) { + case OMP_INFOTYPE_KERNEL_ARGS: + return "KernelArgs"; + case OMP_INFOTYPE_MAPPING_EXISTS: + return "MappingExists"; + case OMP_INFOTYPE_DUMP_TABLE: + return "DumpTable"; + case OMP_INFOTYPE_MAPPING_CHANGED: + return "MappingChanged"; + case OMP_INFOTYPE_PLUGIN_KERNEL: + return "PluginKernel"; + case OMP_INFOTYPE_DATA_TRANSFER: + return "DataTransfer"; + case OMP_INFOTYPE_EMPTY_MAPPING: + return "EmptyMapping"; + case OMP_INFOTYPE_ALL: + return "Default"; + } + return static_cast<const char *>(nullptr); + }; + + constexpr const char *result = getId(); + static_assert(result != nullptr, "Unknown InfoId being used"); + return result; +} + +// Transform the INFO id to the corresponding debug type and print the message +#define INFO_DEBUG_INT(_flags, _id, ...) \ + ODBG(::llvm::omp::target::debug::InfoIdToODT<_flags>()) \ + << FORMAT_TO_STR(__VA_ARGS__); // Define default format for pointers static inline raw_ostream &operator<<(raw_ostream &Os, void *Ptr) { @@ -627,9 +666,11 @@ static inline raw_ostream &operator<<(raw_ostream &Os, void *Ptr) { } #else -#define DP(...) \ + +#define INFO_DEBUG_INT(_flags, _id, ...) \ { \ } + #endif // OMPTARGET_DEBUG // New REPORT macro in the same style as ODBG diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h index fd458fa0..5dddd00 100644 --- a/offload/include/omptarget.h +++ b/offload/include/omptarget.h @@ -80,6 +80,10 @@ enum tgt_map_type { // Attach pointer and pointee, after processing all other maps. // Applicable to map-entering directives. Does not change ref-count. OMP_TGT_MAPTYPE_ATTACH = 0x4000, + // When a lookup fails, fall back to using null as the translated pointer, + // instead of preserving the original pointer's value. Currently only + // useful in conjunction with RETURN_PARAM. + OMP_TGT_MAPTYPE_FB_NULLIFY = 0x8000, // descriptor for non-contiguous target-update OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000, // member of struct, member given by [16 MSBs] - 1 diff --git a/offload/liboffload/API/Queue.td b/offload/liboffload/API/Queue.td index ededa9c..4008432 100644 --- a/offload/liboffload/API/Queue.td +++ b/offload/liboffload/API/Queue.td @@ -125,3 +125,15 @@ def olLaunchHostFunction : Function { ]; let returns = []; } + +def olQueryQueue : Function { + let desc = "Query for queue work completion in a non-blocking manner."; + let details = [ + "The function checks if a queue work has completed enqueued work without blocking the calling thread." + ]; + let params = [ + Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN>, + Param<"bool *", "IsQueueWorkCompleted", " A flag indicating if the queue work has completed", PARAM_OUT_OPTIONAL> + ]; + let returns = []; +} diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 4a4f665..6c37f5b 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -1005,10 +1005,28 @@ Error olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr, if (auto Res = DstDevice->Device->dataSubmit(DstPtr, SrcPtr, Size, QueueImpl)) return Res; - } else { + } else if (SrcDevice->Platform.Plugin == DstDevice->Platform.Plugin && + SrcDevice->Platform.Plugin->isDataExchangable( + SrcDevice->Device->getDeviceId(), + DstDevice->Device->getDeviceId())) { if (auto Res = SrcDevice->Device->dataExchange(SrcPtr, *DstDevice->Device, DstPtr, Size, QueueImpl)) return Res; + } else { + if (Queue) + if (auto Res = olSyncQueue_impl(Queue)) + return Res; + + void *Buffer = malloc(Size); + if (!Buffer) + return createOffloadError(ErrorCode::OUT_OF_RESOURCES, + "Couldn't allocate a buffer for transfer"); + Error Res = SrcDevice->Device->dataRetrieve(Buffer, SrcPtr, Size, nullptr); + if (!Res) + Res = DstDevice->Device->dataSubmit(DstPtr, Buffer, Size, nullptr); + + free(Buffer); + return Res; } return Error::success(); @@ -1231,5 +1249,17 @@ Error olMemUnregister_impl(ol_device_handle_t Device, void *Ptr) { return Device->Device->dataUnlock(Ptr); } +Error olQueryQueue_impl(ol_queue_handle_t Queue, bool *IsQueueWorkCompleted) { + if (Queue->AsyncInfo->Queue) { + if (auto Err = Queue->Device->Device->queryAsync(Queue->AsyncInfo, false, + IsQueueWorkCompleted)) + return Err; + } else if (IsQueueWorkCompleted) { + // No underlying queue means there's no work to complete. + *IsQueueWorkCompleted = true; + } + return Error::success(); +} + } // namespace offload } // namespace llvm diff --git a/offload/libomptarget/OpenMP/InteropAPI.cpp b/offload/libomptarget/OpenMP/InteropAPI.cpp index a694af1..2eaf376 100644 --- a/offload/libomptarget/OpenMP/InteropAPI.cpp +++ b/offload/libomptarget/OpenMP/InteropAPI.cpp @@ -219,7 +219,7 @@ omp_interop_val_t *__tgt_interop_get(ident_t *LocRef, int32_t InteropType, auto DeviceOrErr = PM->getDevice(DeviceNum); if (!DeviceOrErr) { - [[maybe_unused]] std::string ErrStr = toString(DeviceOrErr.takeError()); + std::string ErrStr = toString(DeviceOrErr.takeError()); ODBG(ODT_Interface) << "Couldn't find device " << DeviceNum << " while constructing interop object: " << ErrStr; return omp_interop_none; diff --git a/offload/libomptarget/OpenMP/Mapping.cpp b/offload/libomptarget/OpenMP/Mapping.cpp index c71728c..b8edd79 100644 --- a/offload/libomptarget/OpenMP/Mapping.cpp +++ b/offload/libomptarget/OpenMP/Mapping.cpp @@ -83,10 +83,14 @@ int MappingInfoTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, /*UseHoldRefCount=*/false, /*Name=*/nullptr, /*IsRefCountINF=*/true)) .first->HDTT; - ODBG(ODT_Mapping) << "Creating new map entry: HstBase=" << NewEntry.HstPtrBase - << ", HstBegin=" << NewEntry.HstPtrBegin - << ", HstEnd=" << NewEntry.HstPtrEnd - << ", TgtBegin=" << NewEntry.TgtPtrBegin + ODBG(ODT_Mapping) << "Creating new map entry: HstBase=" + << reinterpret_cast<void *>(NewEntry.HstPtrBase) + << ", HstBegin=" + << reinterpret_cast<void *>(NewEntry.HstPtrBegin) + << ", HstEnd=" + << reinterpret_cast<void *>(NewEntry.HstPtrEnd) + << ", TgtBegin=" + << reinterpret_cast<void *>(NewEntry.TgtPtrBegin) << ", DynRefCount=" << NewEntry.dynRefCountToStr() << ", HoldRefCount=" << NewEntry.holdRefCountToStr(); (void)NewEntry; @@ -502,9 +506,11 @@ int MappingInfoTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) { assert(Entry && "Trying to deallocate a null entry."); - ODBG(ODT_Mapping) << "Deleting tgt data " << Entry->TgtPtrBegin << " of size " - << Size << " by freeing allocation " - << "starting at " << Entry->TgtAllocBegin; + ODBG(ODT_Mapping) << "Deleting tgt data " + << reinterpret_cast<void *>(Entry->TgtPtrBegin) + << " of size " << Size << " by freeing allocation " + << "starting at " + << reinterpret_cast<void *>(Entry->TgtAllocBegin); void *Event = Entry->getEvent(); if (Event && Device.destroyEvent(Event) != OFFLOAD_SUCCESS) { diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp index 924a986..41b653a 100644 --- a/offload/libomptarget/PluginManager.cpp +++ b/offload/libomptarget/PluginManager.cpp @@ -59,7 +59,7 @@ void PluginManager::deinit() { continue; if (auto Err = Plugin->deinit()) { - [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); + std::string InfoMsg = toString(std::move(Err)); ODBG(ODT_Deinit) << "Failed to deinit plugin: " << InfoMsg; } Plugin.release(); @@ -73,7 +73,7 @@ bool PluginManager::initializePlugin(GenericPluginTy &Plugin) { return true; if (auto Err = Plugin.init()) { - [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); + std::string InfoMsg = toString(std::move(Err)); ODBG(ODT_Init) << "Failed to init plugin: " << InfoMsg; return false; } @@ -106,7 +106,7 @@ bool PluginManager::initializeDevice(GenericPluginTy &Plugin, auto Device = std::make_unique<DeviceTy>(&Plugin, UserId, DeviceId); if (auto Err = Device->init()) { - [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); + std::string InfoMsg = toString(std::move(Err)); ODBG(ODT_Init) << "Failed to init device " << DeviceId << ": " << InfoMsg; return false; } diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index 676fda5..bd99ede 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -600,8 +600,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // then no argument is marked as TARGET_PARAM ("omp target data map" is not // associated with a target region, so there are no target parameters). This // may be considered a hack, we could revise the scheme in the future. - bool UpdateRef = - !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && I == 0); + bool UpdateRef = !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF); MappingInfoTy::HDTTMapAccessorTy HDTTMap = Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor(); @@ -707,14 +706,20 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // to references to a local device pointer that refers to this device // address. // - // TODO: Add a new map-type bit to support OpenMP 6.1's `fb_nullify` - // and set the result to `nullptr - Delta`. Note that `fb_nullify` is - // already the default for `need_device_ptr`, but clang/flang do not - // support its codegen yet. - TgtPtrBase = reinterpret_cast<void *>( - reinterpret_cast<uintptr_t>(HstPtrBegin) - Delta); - ODBG(ODT_Mapping) << "Returning host pointer " << TgtPtrBase - << " as fallback (lookup failed)"; + // OpenMP 6.1's `fb_nullify` fallback behavior: when the FB_NULLIFY bit + // is set by the compiler, e.g. for `use/need_device_ptr(fb_nullify)`), + // return `nullptr - Delta` when lookup fails. + if (ArgTypes[I] & OMP_TGT_MAPTYPE_FB_NULLIFY) { + TgtPtrBase = reinterpret_cast<void *>( + reinterpret_cast<uintptr_t>(nullptr) - Delta); + ODBG(ODT_Mapping) << "Returning offsetted null pointer " << TgtPtrBase + << " as fallback (lookup failed)"; + } else { + TgtPtrBase = reinterpret_cast<void *>( + reinterpret_cast<uintptr_t>(HstPtrBegin) - Delta); + ODBG(ODT_Mapping) << "Returning host pointer " << TgtPtrBase + << " as fallback (lookup failed)"; + } } ArgsBase[I] = TgtPtrBase; } @@ -1104,9 +1109,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void *HstPtrBegin = Args[I]; int64_t DataSize = ArgSizes[I]; bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT; - bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) || - (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) && - !(FromMapper && I == 0); + bool UpdateRef = !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) || + (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ); bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE; bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT; bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD; @@ -1254,12 +1258,12 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, << "Restoring target descriptor " << ShadowPtr.TgtPtrAddr << " to its original content (" << ShadowPtr.PtrSize << " bytes), containing pointee address " - << ShadowPtr.TgtPtrContent.data(); + << static_cast<const void *>(ShadowPtr.TgtPtrContent.data()); } else { ODBG(ODT_Mapping) << "Restoring target pointer " << ShadowPtr.TgtPtrAddr << " to its original value " - << ShadowPtr.TgtPtrContent.data(); + << static_cast<const void *>(ShadowPtr.TgtPtrContent.data()); } Ret = Device.submitData(ShadowPtr.TgtPtrAddr, ShadowPtr.TgtPtrContent.data(), @@ -1299,12 +1303,14 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, << "Restoring host descriptor " << ShadowPtr.HstPtrAddr << " to its original content (" << ShadowPtr.PtrSize << " bytes), containing pointee address " - << ShadowPtr.HstPtrContent.data(); + << static_cast<const void *>( + ShadowPtr.HstPtrContent.data()); } else { ODBG(ODT_Mapping) << "Restoring host pointer " << ShadowPtr.HstPtrAddr << " to its original value " - << ShadowPtr.HstPtrContent.data(); + << static_cast<const void *>( + ShadowPtr.HstPtrContent.data()); } std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(), ShadowPtr.PtrSize); diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 287bb14..379c8ec 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2431,7 +2431,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { } /// Query for the completion of the pending operations on the async info. - Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override { + Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue, + bool *IsQueueWorkCompleted) override { + if (IsQueueWorkCompleted) + *IsQueueWorkCompleted = false; AMDGPUStreamTy *Stream = reinterpret_cast<AMDGPUStreamTy *>(AsyncInfo.Queue); assert(Stream && "Invalid stream"); @@ -2444,11 +2447,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { if (!(*CompletedOrErr)) return Plugin::success(); + if (IsQueueWorkCompleted) + *IsQueueWorkCompleted = true; // Once the stream is completed, return it to stream pool and reset // AsyncInfo. This is to make sure the synchronization only works for its // own tasks. - AsyncInfo.Queue = nullptr; - return AMDGPUStreamManager.returnResource(Stream); + if (ReleaseQueue) { + AsyncInfo.Queue = nullptr; + return AMDGPUStreamManager.returnResource(Stream); + } + return Plugin::success(); } /// Pin the host buffer and return the device pointer that should be used for diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index caf86a9..19db44c 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -854,8 +854,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy { /// Query for the completion of the pending operations on the __tgt_async_info /// structure in a non-blocking manner. - Error queryAsync(__tgt_async_info *AsyncInfo); - virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo) = 0; + Error queryAsync(__tgt_async_info *AsyncInfo, bool ReleaseQueue = true, + bool *IsQueueWorkCompleted = nullptr); + virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue, + bool *IsQueueWorkCompleted) = 0; /// Check whether the architecture supports VA management virtual bool supportVAManagement() const { return false; } diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 4ec8366..807df0f 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -849,7 +849,8 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { } Expected<DeviceImageTy *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin, StringRef InputTgtImage) { - ODBG(OLDT_Init) << "Load data from image " << InputTgtImage.bytes_begin(); + ODBG(OLDT_Init) << "Load data from image " + << static_cast<const void *>(InputTgtImage.bytes_begin()); std::unique_ptr<MemoryBuffer> Buffer; if (identify_magic(InputTgtImage) == file_magic::bitcode) { @@ -1198,12 +1199,14 @@ Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo, return Plugin::success(); } -Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo) { +Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo, + bool ReleaseQueue, + bool *IsQueueWorkCompleted) { if (!AsyncInfo || !AsyncInfo->Queue) return Plugin::error(ErrorCode::INVALID_ARGUMENT, "invalid async info queue"); - return queryAsyncImpl(*AsyncInfo); + return queryAsyncImpl(*AsyncInfo, ReleaseQueue, IsQueueWorkCompleted); } Error GenericDeviceTy::memoryVAMap(void **Addr, void *VAddr, size_t *RSize) { @@ -1656,9 +1659,10 @@ int32_t GenericPluginTy::is_initialized() const { return Initialized; } int32_t GenericPluginTy::isPluginCompatible(StringRef Image) { auto HandleError = [&](Error Err) -> bool { - [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); - ODBG(OLDT_Init) << "Failure to check validity of image " << Image.data() - << ": " << ErrStr; + std::string ErrStr = toString(std::move(Err)); + ODBG(OLDT_Init) << "Failure to check validity of image " + << static_cast<const void *>(Image.data()) << ": " + << ErrStr; return false; }; switch (identify_magic(Image)) { @@ -1685,8 +1689,9 @@ int32_t GenericPluginTy::isPluginCompatible(StringRef Image) { int32_t GenericPluginTy::isDeviceCompatible(int32_t DeviceId, StringRef Image) { auto HandleError = [&](Error Err) -> bool { - [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); - ODBG(OLDT_Init) << "Failure to check validity of image " << Image << ": " + std::string ErrStr = toString(std::move(Err)); + ODBG(OLDT_Init) << "Failure to check validity of image " + << static_cast<const void *>(Image.data()) << ": " << ErrStr; return false; }; @@ -2069,7 +2074,7 @@ int32_t GenericPluginTy::use_auto_zero_copy(int32_t DeviceId) { int32_t GenericPluginTy::is_accessible_ptr(int32_t DeviceId, const void *Ptr, size_t Size) { auto HandleError = [&](Error Err) -> bool { - [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); + std::string ErrStr = toString(std::move(Err)); ODBG(OLDT_Device) << "Failure while checking accessibility of pointer " << Ptr << " for device " << DeviceId << ": " << ErrStr; return false; diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 621c90e..d5ab0b3 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -793,7 +793,10 @@ struct CUDADeviceTy : public GenericDeviceTy { } /// Query for the completion of the pending operations on the async info. - Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override { + Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue, + bool *IsQueueWorkCompleted) override { + if (IsQueueWorkCompleted) + *IsQueueWorkCompleted = false; CUstream Stream = reinterpret_cast<CUstream>(AsyncInfo.Queue); CUresult Res = cuStreamQuery(Stream); @@ -801,12 +804,16 @@ struct CUDADeviceTy : public GenericDeviceTy { if (Res == CUDA_ERROR_NOT_READY) return Plugin::success(); + if (IsQueueWorkCompleted) + *IsQueueWorkCompleted = true; // Once the stream is synchronized and the operations completed (or an error // occurs), return it to stream pool and reset AsyncInfo. This is to make // sure the synchronization only works for its own tasks. - AsyncInfo.Queue = nullptr; - if (auto Err = CUDAStreamManager.returnResource(Stream)) - return Err; + if (ReleaseQueue) { + AsyncInfo.Queue = nullptr; + if (auto Err = CUDAStreamManager.returnResource(Stream)) + return Err; + } return Plugin::check(Res, "error in cuStreamQuery: %s"); } diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index 81fbb67..6033796 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -336,7 +336,10 @@ struct GenELF64DeviceTy : public GenericDeviceTy { /// All functions are already synchronous. No need to do anything on this /// query function. - Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override { + Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue, + bool *IsQueueWorkCompleted) override { + if (IsQueueWorkCompleted) + *IsQueueWorkCompleted = true; return Plugin::success(); } diff --git a/offload/plugins-nextgen/level_zero/include/L0Device.h b/offload/plugins-nextgen/level_zero/include/L0Device.h index d14e710..001a41b 100644 --- a/offload/plugins-nextgen/level_zero/include/L0Device.h +++ b/offload/plugins-nextgen/level_zero/include/L0Device.h @@ -576,7 +576,8 @@ public: AsyncInfoWrapperTy &AsyncInfoWrapper) override; Error synchronizeImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue) override; - Error queryAsyncImpl(__tgt_async_info &AsyncInfo) override; + Error queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue, + bool *IsQueueWorkCompleted) override; Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size, AsyncInfoWrapperTy &AsyncInfoWrapper) override; Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size, diff --git a/offload/plugins-nextgen/level_zero/src/L0Device.cpp b/offload/plugins-nextgen/level_zero/src/L0Device.cpp index 2cae1e4..4db3c4e 100644 --- a/offload/plugins-nextgen/level_zero/src/L0Device.cpp +++ b/offload/plugins-nextgen/level_zero/src/L0Device.cpp @@ -192,8 +192,7 @@ Error L0DeviceTy::initImpl(GenericPluginTy &Plugin) { CALL_ZE_RET_ERROR(zeDeviceGetCacheProperties, zeDevice, &Count, &CacheProperties); - DeviceName = - std::string(DeviceProperties.name, sizeof(DeviceProperties.name)); + DeviceName = std::string(DeviceProperties.name); ODBG(OLDT_Device) << "Found a GPU device, Name = " << DeviceProperties.name; @@ -356,10 +355,15 @@ L0DeviceTy::hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) { return true; } -Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) { +Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue, + bool *IsQueueWorkCompleted) { + if (IsQueueWorkCompleted) + *IsQueueWorkCompleted = true; const bool IsAsync = AsyncInfo.Queue && asyncEnabled(); if (!IsAsync) return Plugin::success(); + if (IsQueueWorkCompleted) + *IsQueueWorkCompleted = false; auto &Plugin = getPlugin(); auto *AsyncQueue = static_cast<AsyncQueueTy *>(AsyncInfo.Queue); @@ -367,6 +371,9 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) { if (!AsyncQueue->WaitEvents.empty()) return Plugin::success(); + if (IsQueueWorkCompleted) + *IsQueueWorkCompleted = true; + // Commit delayed USM2M copies. for (auto &USM2M : AsyncQueue->USM2MList) { std::copy_n(static_cast<const char *>(std::get<0>(USM2M)), @@ -377,9 +384,11 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo) { std::copy_n(static_cast<char *>(std::get<0>(H2M)), std::get<2>(H2M), static_cast<char *>(std::get<1>(H2M))); } - Plugin.releaseAsyncQueue(AsyncQueue); - getStagingBuffer().reset(); - AsyncInfo.Queue = nullptr; + if (ReleaseQueue) { + Plugin.releaseAsyncQueue(AsyncQueue); + getStagingBuffer().reset(); + AsyncInfo.Queue = nullptr; + } return Plugin::success(); } diff --git a/offload/test/mapping/declare_mapper_target_checks.cpp b/offload/test/mapping/declare_mapper_target_checks.cpp new file mode 100644 index 0000000..562e283 --- /dev/null +++ b/offload/test/mapping/declare_mapper_target_checks.cpp @@ -0,0 +1,145 @@ +// RUN: %libomptarget-compilexx-run-and-check-generic +// XFAIL: intelgpu + +#include <omp.h> +#include <stdio.h> + +#define TRUE 1 +#define FALSE 0 + +struct TY1 { + int i1, i2, i3; + static constexpr auto name = "TY1"; +}; +struct TY2 { + int i1, i2, i3; + static constexpr auto name = "TY2"; +}; + +// TY1 is not mapped, TY2 is +#pragma omp declare mapper(TY2 t) map(to : t.i1) map(from : t.i3) + +struct TY3 { + TY2 n; + static constexpr auto name = "TY3"; +}; +struct TY4 { + int a; + TY2 n; + int b; + static constexpr auto name = "TY4"; +}; + +template <typename T> int testType() { + T t1[2], t2[3], t3[4]; + for (int i = 0; i < 2; i++) + t1[i].i1 = t3[i].i1 = 1; + +#pragma omp target map(tofrom : t1, t2, t3) + for (int i = 0; i < 2; i++) { + t1[i].i3 = t3[i].i3 = t1[i].i1; + t1[i].i1 = t3[i].i1 = 7; + } + + for (int i = 0; i < 2; i++) { + if (t1[i].i3 != 1) { + printf("failed %s. t1[%d].i3 (%d) != t1[%d].i1 (%d)\n", T::name, i, + t1[i].i3, i, t1[i].i1); + return 1; + } + if (t3[i].i3 != 1) { + printf("failed %s. t3[%d].i3 (%d) != t3[%d].i1 (%d)\n", T::name, i, + t3[i].i3, i, t3[i].i1); + return 1; + } + } + + int pt0 = omp_target_is_present(&t1[0], omp_get_default_device()); + int pt1 = omp_target_is_present(&t2[1], omp_get_default_device()); + int pt2 = omp_target_is_present(&t3[2], omp_get_default_device()); + + printf("present check for %s: t1 %i, t2 %i, t3 %i, expected 3x 0\n", T::name, + pt0, pt1, pt2); + return pt0 + pt1 + pt2; +} + +template <typename T> int testTypeNestedPtr(T t1[2], T t2[3], T t3[4]) { + for (int i = 0; i < 2; i++) + t1[i].n.i1 = t3[i].n.i1 = 1; + +#pragma omp target map(tofrom : t1[0 : 2], t2[0 : 3], t3[0 : 4]) + for (int i = 0; i < 2; i++) { + t1[i].n.i3 = t3[i].n.i3 = t1[i].n.i1; + t1[i].n.i1 = t3[i].n.i1 = 7; + } + + for (int i = 0; i < 2; i++) { + if (t1[i].n.i3 != t1[i].n.i1) { + printf("failed %s-ptr. t1[%d].i3 (%d) != t1[%d].i1 (%d)\n", T::name, i, + t1[i].n.i3, i, t1[i].n.i1); + return 1; + } + if (t3[i].n.i3 != t3[i].n.i1) { + printf("failed %s-ptr. t3[%d].i3 (%d) != t3[%d].i1 (%d)\n", T::name, i, + t3[i].n.i3, i, t3[i].n.i1); + return 1; + } + } + + int pt0 = omp_target_is_present(&t1[0], omp_get_default_device()); + int pt1 = omp_target_is_present(&t2[1], omp_get_default_device()); + int pt2 = omp_target_is_present(&t3[2], omp_get_default_device()); + + printf("present check for %s-ptr: t1 %i, t2 %i, t3 %i, expected 3x 0\n", + T::name, pt0, pt1, pt2); + return pt0 + pt1 + pt2; +} + +template <typename T> int testTypeNested() { + T t1[2], t2[3], t3[4]; + testTypeNestedPtr(t1, t2, t3); + for (int i = 0; i < 2; i++) + t1[i].n.i1 = t3[i].n.i1 = 1; + +#pragma omp target map(tofrom : t1, t2, t3) + for (int i = 0; i < 2; i++) { + t1[i].n.i3 = t3[i].n.i3 = t1[i].n.i1; + t1[i].n.i1 = t3[i].n.i1 = 7; + } + + for (int i = 0; i < 2; i++) { + if (t1[i].n.i3 != t1[i].n.i1) { + printf("failed %s. t1[%d].i3 (%d) != t1[%d].i1 (%d)\n", T::name, i, + t1[i].n.i3, i, t1[i].n.i1); + return 1; + } + if (t3[i].n.i3 != t3[i].n.i1) { + printf("failed %s. t3[%d].i3 (%d) != t3[%d].i1 (%d)\n", T::name, i, + t3[i].n.i3, i, t3[i].n.i1); + return 1; + } + } + + int pt0 = omp_target_is_present(&t1[0], omp_get_default_device()); + int pt1 = omp_target_is_present(&t2[1], omp_get_default_device()); + int pt2 = omp_target_is_present(&t3[2], omp_get_default_device()); + + printf("present check for %s: t1 %i, t2 %i, t3 %i, expected 3x 0\n", T::name, + pt0, pt1, pt2); + return pt0 + pt1 + pt2; +} + +int main(int argc, char **argv) { + int r = 0; + r += testType<TY1>(); + // CHECK: present check for TY1: t1 0, t2 0, t3 0, expected 3x 0 + r += testType<TY2>(); + // CHECK: present check for TY2: t1 0, t2 0, t3 0, expected 3x 0 + r += testTypeNested<TY3>(); + // CHECK: present check for TY3-ptr: t1 0, t2 0, t3 0, expected 3x 0 + // CHECK: present check for TY3: t1 0, t2 0, t3 0, expected 3x 0 + r += testTypeNested<TY4>(); + // CHECK: present check for TY4-ptr: t1 0, t2 0, t3 0, expected 3x 0 + // CHECK: present check for TY4: t1 0, t2 0, t3 0, expected 3x 0 + return r; +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback.cpp new file mode 100644 index 0000000..5c232d5 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback.cpp @@ -0,0 +1,32 @@ +// RUN: %libomptarget-compilexx-generic +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer retains its original value by +// default. +// +// This is necessary because we must assume that the +// pointee is device-accessible, even if it was not +// previously mapped. + +#include <stdio.h> + +int x = 0; + +struct ST { + int *a = &x; + + void f1() { + printf("%p\n", a); // CHECK: 0x[[#%x,ADDR:]] +#pragma omp target data use_device_ptr(a) + printf("%p\n", a); // CHECK-NEXT: 0x{{0*}}[[#ADDR]] + } +}; + +int main() { + ST s; + s.f1(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_nullify.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_nullify.cpp new file mode 100644 index 0000000..fca0eee --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_nullify.cpp @@ -0,0 +1,28 @@ +// RUN: %libomptarget-compilexx-generic -fopenmp-version=61 +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefixes=CHECK,OFFLOAD +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefixes=CHECK,NOOFFLOAD + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer is set to null because of fb_nullify. + +#include <stdio.h> + +int x = 0; + +struct ST { + int *a = &x; + + void f1() { + printf("%p\n", a); // CHECK: 0x[[#%x,ADDR:]] +#pragma omp target data use_device_ptr(fb_nullify : a) + printf("%p\n", a); // OFFLOAD-NEXT: (nil) + // NOOFFLOAD-NEXT: 0x{{0*}}[[#ADDR]] + } +}; + +int main() { + ST s; + s.f1(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_preserve.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_preserve.cpp new file mode 100644 index 0000000..51944c5 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_fallback_preserve.cpp @@ -0,0 +1,28 @@ +// RUN: %libomptarget-compilexx-generic -fopenmp-version=61 +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer retains its original value +// because of fb_preserve. + +#include <stdio.h> + +int x = 0; + +struct ST { + int *a = &x; + + void f1() { + printf("%p\n", a); // CHECK: 0x[[#%x,ADDR:]] +#pragma omp target data use_device_ptr(fb_preserve : a) + printf("%p\n", a); // CHECK-NEXT: 0x{{0*}}[[#ADDR]] + } +}; + +int main() { + ST s; + s.f1(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback.cpp new file mode 100644 index 0000000..59a8fac --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback.cpp @@ -0,0 +1,33 @@ +// RUN: %libomptarget-compilexx-generic +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer retains its original value by +// default. +// +// This is necessary because we must assume that the +// pointee is device-accessible, even if it was not +// previously mapped. + +#include <stdio.h> + +int x = 0; +int *y = &x; + +struct ST { + int *&b = y; + + void f2() { + printf("%p\n", b); // CHECK: 0x[[#%x,ADDR:]] +#pragma omp target data use_device_ptr(b) + printf("%p\n", b); // CHECK-NEXT: 0x{{0*}}[[#ADDR]] + } +}; + +int main() { + ST s; + s.f2(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_nullify.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_nullify.cpp new file mode 100644 index 0000000..65c7173 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_nullify.cpp @@ -0,0 +1,29 @@ +// RUN: %libomptarget-compilexx-generic -fopenmp-version=61 +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefixes=CHECK,OFFLOAD +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefixes=CHECK,NOOFFLOAD + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer is set to null because of fb_nullify. + +#include <stdio.h> + +int x = 0; +int *y = &x; + +struct ST { + int *&b = y; + + void f2() { + printf("%p\n", b); // CHECK: 0x[[#%x,ADDR:]] +#pragma omp target data use_device_ptr(fb_nullify : b) + printf("%p\n", b); // OFFLOAD-NEXT: (nil) + // NOOFFLOAD-NEXT: 0x{{0*}}[[#ADDR]] + } +}; + +int main() { + ST s; + s.f2(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_preserve.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_preserve.cpp new file mode 100644 index 0000000..beeb752 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_class_member_ref_fallback_preserve.cpp @@ -0,0 +1,29 @@ +// RUN: %libomptarget-compilexx-generic -fopenmp-version=61 +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer retains its original value +// because of fb_preserve. + +#include <stdio.h> + +int x = 0; +int *y = &x; + +struct ST { + int *&b = y; + + void f2() { + printf("%p\n", b); // CHECK: 0x[[#%x,ADDR:]] +#pragma omp target data use_device_ptr(fb_preserve : b) + printf("%p\n", b); // CHECK-NEXT: 0x{{0*}}[[#ADDR]] + } +}; + +int main() { + ST s; + s.f2(); +} diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.c b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.cpp index 33a3634..5be209a 100644 --- a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.c +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.cpp @@ -1,4 +1,8 @@ -// RUN: %libomptarget-compilexx-run-and-check-generic +// RUN: %libomptarget-compilexx-generic +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic // Test that when a use_device_ptr lookup fails, the // privatized pointer retains its original value by diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_nullify.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_nullify.cpp new file mode 100644 index 0000000..984744cd --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_nullify.cpp @@ -0,0 +1,21 @@ +// RUN: %libomptarget-compilexx-generic -fopenmp-version=61 +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefixes=CHECK,OFFLOAD +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefixes=CHECK,NOOFFLOAD + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer is set to null because of fb_nullify. + +#include <stdio.h> +int x; +int *xp = &x; + +void f1() { + printf("%p\n", xp); // CHECK: 0x[[#%x,ADDR:]] +#pragma omp target data use_device_ptr(fb_nullify : xp) + printf("%p\n", xp); // OFFLOAD-NEXT: (nil) + // NOOFFLOAD-NEXT: 0x{{0*}}[[#ADDR]] +} + +int main() { f1(); } diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_preserve.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_preserve.cpp new file mode 100644 index 0000000..197704f --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback_preserve.cpp @@ -0,0 +1,21 @@ +// RUN: %libomptarget-compilexx-generic -fopenmp-version=61 +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer retains its original value +// because of fb_preserve. + +#include <stdio.h> +int x; +int *xp = &x; + +void f1() { + printf("%p\n", xp); // CHECK: 0x[[#%x,ADDR:]] +#pragma omp target data use_device_ptr(fb_preserve : xp) + printf("%p\n", xp); // CHECK-NEXT: 0x{{0*}}[[#ADDR]] +} + +int main() { f1(); } diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback.cpp new file mode 100644 index 0000000..1060ed9 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback.cpp @@ -0,0 +1,26 @@ +// RUN: %libomptarget-compilexx-generic +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer retains its original value by +// default. +// +// This is necessary because we must assume that the +// pointee is device-accessible, even if it was not +// previously mapped. + +#include <stdio.h> +int x; +int *xp = &x; +int *&xpr = xp; + +void f2() { + printf("%p\n", xpr); // CHECK: 0x[[#%x,ADDR:]] +#pragma omp target data use_device_ptr(xpr) + printf("%p\n", xpr); // CHECK-NEXT: 0x{{0*}}[[#ADDR]] +} + +int main() { f2(); } diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_nullify.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_nullify.cpp new file mode 100644 index 0000000..7fa76dd6 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_nullify.cpp @@ -0,0 +1,24 @@ +// RUN: %libomptarget-compilexx-generic -fopenmp-version=61 +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefixes=CHECK,OFFLOAD +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefixes=CHECK,NOOFFLOAD + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer is set to null because of fb_nullify. + +#include <stdio.h> +int x; +int *xp = &x; +int *&xpr = xp; + +void f2() { + printf("%p\n", xpr); // CHECK: 0x[[#%x,ADDR:]] + // FIXME: We won't get "nil" until we start privatizing xpr. +#pragma omp target data use_device_ptr(fb_nullify : xpr) + printf("%p\n", xpr); // EXPECTED-OFFLOAD-NEXT: (nil) + // OFFLOAD-NEXT: 0x{{0*}}[[#ADDR]] + // NOOFFLOAD-NEXT: 0x{{0*}}[[#ADDR]] +} + +int main() { f2(); } diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_preserve.cpp b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_preserve.cpp new file mode 100644 index 0000000..e7f8bd4 --- /dev/null +++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_ref_fallback_preserve.cpp @@ -0,0 +1,22 @@ +// RUN: %libomptarget-compilexx-generic -fopenmp-version=61 +// RUN: %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic +// RUN: env OMP_TARGET_OFFLOAD=disabled %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +// Test that when a use_device_ptr lookup fails, the +// privatized pointer retains its original value +// because of fb_preserve. + +#include <stdio.h> +int x; +int *xp = &x; +int *&xpr = xp; + +void f2() { + printf("%p\n", xpr); // CHECK: 0x[[#%x,ADDR:]] +#pragma omp target data use_device_ptr(fb_preserve : xpr) + printf("%p\n", xpr); // CHECK-NEXT: 0x{{0*}}[[#ADDR]] +} + +int main() { f2(); } diff --git a/offload/test/offloading/fortran/default-mapper-derived-enter-data-teams-collapse.f90 b/offload/test/offloading/fortran/default-mapper-derived-enter-data-teams-collapse.f90 new file mode 100644 index 0000000..6b87e81 --- /dev/null +++ b/offload/test/offloading/fortran/default-mapper-derived-enter-data-teams-collapse.f90 @@ -0,0 +1,53 @@ +! Regression test for default mappers on nested derived types with allocatable +! members when mapping a parent object and running an optimized target region. + +! REQUIRES: flang, amdgpu + +! RUN: %libomptarget-compile-fortran-generic -O3 +! RUN: %libomptarget-run-generic | %fcheck-generic + +program test_default_mapper_enter_data_teams_collapse + implicit none + + type inner_type + real, allocatable :: data(:) + end type inner_type + + type outer_type + type(inner_type) :: inner + character(len=19) :: desc = ' ' + end type outer_type + + type(outer_type) :: obj + integer, parameter :: n = 10 + integer :: i, j + real :: expected, actual + + allocate(obj%inner%data(n)) + obj%inner%data = 0.0 + + !$omp target enter data map(to: obj) + + !$omp target teams distribute parallel do collapse(2) + do i = 1, n + do j = 1, n + obj%inner%data(i) = real(i) + end do + end do + !$omp end target teams distribute parallel do + + !$omp target exit data map(from: obj) + + expected = real(n * (n + 1)) / 2.0 + actual = sum(obj%inner%data) + + if (abs(actual - expected) < 1.0e-6) then + print *, "PASS" + else + print *, "FAIL", actual, expected + end if + + deallocate(obj%inner%data) +end program test_default_mapper_enter_data_teams_collapse + +! CHECK: PASS diff --git a/offload/test/offloading/fortran/recursive-default-mapper.f90 b/offload/test/offloading/fortran/recursive-default-mapper.f90 new file mode 100644 index 0000000..47b706d --- /dev/null +++ b/offload/test/offloading/fortran/recursive-default-mapper.f90 @@ -0,0 +1,40 @@ +! Offloading test for recursive default mapper emission +! REQUIRES: flang, amdgpu + +! RUN: %libomptarget-compile-fortran-run-and-check-generic + +module recursive_mapper_mod + implicit none + + type :: inner + integer :: value + type(inner), pointer :: next + end type inner + + type :: outer + integer, allocatable :: arr(:) + type(inner), pointer :: head + end type outer + +contains + +end module recursive_mapper_mod + +program main + use recursive_mapper_mod + implicit none + + type(outer) :: o + + allocate(o%arr(2)) + o%arr = [1, 2] + + !$omp target map(tofrom: o) + o%arr(1) = o%arr(1) + 1 + o%arr(2) = o%arr(2) + 1 + !$omp end target + + print *, o%arr(1), o%arr(2) +end program main + +! CHECK: 2 3 diff --git a/offload/test/offloading/fortran/target-defaultmap-implicit-mapper.f90 b/offload/test/offloading/fortran/target-defaultmap-implicit-mapper.f90 new file mode 100644 index 0000000..d2d8f7a --- /dev/null +++ b/offload/test/offloading/fortran/target-defaultmap-implicit-mapper.f90 @@ -0,0 +1,49 @@ +! Offload test that ensures defaultmap(tofrom: scalar) does not suppress +! implicit default mapper generation for allocatable derived types. +! REQUIRES: flang, amdgpu + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program defaultmap_implicit_mapper + implicit none + + type :: payload_t + integer, allocatable :: arr(:) + end type payload_t + + type(payload_t), allocatable :: obj + integer, parameter :: n = 8 + integer :: i + integer :: scalar + logical :: ok + + allocate(obj) + allocate(obj%arr(n)) + obj%arr = 1 + scalar = 2 + + !$omp target defaultmap(tofrom: scalar) + do i = 1, n + obj%arr(i) = obj%arr(i) + scalar + end do + scalar = 7 + !$omp end target + + ok = .true. + do i = 1, n + if (obj%arr(i) /= 3) ok = .false. + end do + if (scalar /= 7) ok = .false. + + if (ok) then + print *, "Test passed!" + else + print *, "Test failed!" + print *, obj%arr + print *, scalar + end if + + deallocate(obj%arr) + deallocate(obj) +end program defaultmap_implicit_mapper + +! CHECK: Test passed! diff --git a/offload/test/offloading/fortran/target-parameter-array.f90 b/offload/test/offloading/fortran/target-parameter-array.f90 new file mode 100644 index 0000000..b85fb06 --- /dev/null +++ b/offload/test/offloading/fortran/target-parameter-array.f90 @@ -0,0 +1,131 @@ +! Offload test for parameter (constant) arrays and character scalars accessed +! with dynamic indices/substrings in OpenMP target regions. + +! REQUIRES: flang, amdgpu + +! RUN: %libomptarget-compile-fortran-run-and-check-generic + +program test_parameter_mapping + implicit none + integer, parameter :: dp = selected_real_kind(15, 307) + logical :: all_tests_pass + + all_tests_pass = .true. + + ! Test 1: Parameter array with dynamic index + call test_param_array_dynamic_index(all_tests_pass) + + ! Test 2: Integer parameter array + call test_int_param_array(all_tests_pass) + + ! Test 3: Character scalar with dynamic substring + call test_char_substring(all_tests_pass) + + ! Test 4: Verify scalar parameters work (inlined) + call test_scalar_param(all_tests_pass) + + if (all_tests_pass) then + print *, "PASS" + else + print *, "FAIL" + endif + +contains + +! Test 1: Parameter array with dynamic index in target region +subroutine test_param_array_dynamic_index(test_pass) + logical, intent(inout) :: test_pass + real(dp), parameter :: const_array(3) = [1.0_dp, 2.0_dp, 3.0_dp] + integer :: idx + real(dp) :: result + real(dp), parameter :: expected = 2.0_dp + real(dp), parameter :: tolerance = 1.0e-10_dp + + idx = 2 + result = 0.0_dp + + !$omp target map(tofrom:result) map(to:idx) + ! Access parameter array with dynamic index + result = const_array(idx) + !$omp end target + + if (abs(result - expected) > tolerance) then + print *, "Test 1 FAILED: expected", expected, "got", result + test_pass = .false. + endif +end subroutine test_param_array_dynamic_index + +! Test 2: Integer parameter array with different indices +subroutine test_int_param_array(test_pass) + logical, intent(inout) :: test_pass + integer, parameter :: int_array(4) = [10, 20, 30, 40] + integer :: idx1, idx2 + integer :: result1, result2 + + idx1 = 1 + idx2 = 4 + result1 = 0 + result2 = 0 + + !$omp target map(tofrom:result1, result2) map(to:idx1, idx2) + ! Access parameter array with different dynamic indices + result1 = int_array(idx1) + result2 = int_array(idx2) + !$omp end target + + if (result1 /= 10 .or. result2 /= 40) then + print *, "Test 2 FAILED: expected 10, 40 got", result1, result2 + test_pass = .false. + endif +end subroutine test_int_param_array + +! Test 3: Character scalar parameter with dynamic substring access +subroutine test_char_substring(test_pass) + logical, intent(inout) :: test_pass + character(len=20), parameter :: char_scalar = "constant_string_data" + integer :: start_idx, end_idx + character(len=8) :: result + character(len=8), parameter :: expected = "string_d" + + start_idx = 10 + end_idx = 17 + result = "" + + !$omp target map(tofrom:result) map(to:start_idx, end_idx) + ! Dynamic substring access - character scalar must be mapped + result = char_scalar(start_idx:end_idx) + !$omp end target + + if (result /= expected) then + print *, "Test 3 FAILED: expected '", expected, "' got '", result, "'" + test_pass = .false. + endif +end subroutine test_char_substring + +! Test 4: Scalar parameter (can be inlined, no mapping needed) +subroutine test_scalar_param(test_pass) + logical, intent(inout) :: test_pass + integer, parameter :: scalar_const = 42 + real(dp), parameter :: real_const = 3.14159_dp + integer :: int_result + real(dp) :: real_result + real(dp), parameter :: tolerance = 1.0e-5_dp + + int_result = 0 + real_result = 0.0_dp + + !$omp target map(tofrom:int_result, real_result) + ! Scalar parameters should be inlined (no mapping needed) + int_result = scalar_const + real_result = real_const + !$omp end target + + if (int_result /= 42 .or. abs(real_result - real_const) > tolerance) then + print *, "Test 4 FAILED: expected 42, 3.14159 got", int_result, real_result + test_pass = .false. + endif +end subroutine test_scalar_param + +end program test_parameter_mapping + +! CHECK: PASS diff --git a/offload/test/offloading/strided_multiple_update.c b/offload/test/offloading/strided_multiple_update_from.c index 4f2df81..4f2df81 100644 --- a/offload/test/offloading/strided_multiple_update.c +++ b/offload/test/offloading/strided_multiple_update_from.c diff --git a/offload/test/offloading/strided_multiple_update_to.c b/offload/test/offloading/strided_multiple_update_to.c new file mode 100644 index 0000000..bb16d7a --- /dev/null +++ b/offload/test/offloading/strided_multiple_update_to.c @@ -0,0 +1,124 @@ +// This test checks that #pragma omp target update to(data1[0:3:4], +// data2[0:2:5]) correctly updates disjoint strided sections of multiple arrays +// from the host to the device. + +// RUN: %libomptarget-compile-run-and-check-generic +// XFAIL: intelgpu + +#include <omp.h> +#include <stdio.h> + +int main() { + int len = 12; + double data1[len], data2[len]; + + // Initialize host arrays + for (int i = 0; i < len; i++) { + data1[i] = i; + data2[i] = i * 10; + } + + printf("original host array values:\n"); + printf("data1:\n"); + for (int i = 0; i < len; i++) + printf("%.1f\n", data1[i]); + printf("data2:\n"); + for (int i = 0; i < len; i++) + printf("%.1f\n", data2[i]); + + // CHECK: original host array values: + // CHECK-NEXT: data1: + // CHECK-NEXT: 0.0 + // CHECK-NEXT: 1.0 + // CHECK-NEXT: 2.0 + // CHECK-NEXT: 3.0 + // CHECK-NEXT: 4.0 + // CHECK-NEXT: 5.0 + // CHECK-NEXT: 6.0 + // CHECK-NEXT: 7.0 + // CHECK-NEXT: 8.0 + // CHECK-NEXT: 9.0 + // CHECK-NEXT: 10.0 + // CHECK-NEXT: 11.0 + // CHECK-NEXT: data2: + // CHECK-NEXT: 0.0 + // CHECK-NEXT: 10.0 + // CHECK-NEXT: 20.0 + // CHECK-NEXT: 30.0 + // CHECK-NEXT: 40.0 + // CHECK-NEXT: 50.0 + // CHECK-NEXT: 60.0 + // CHECK-NEXT: 70.0 + // CHECK-NEXT: 80.0 + // CHECK-NEXT: 90.0 + // CHECK-NEXT: 100.0 + // CHECK-NEXT: 110.0 + +#pragma omp target data map(tofrom : data1[0 : len], data2[0 : len]) + { + // Initialize device arrays to 20 +#pragma omp target + { + for (int i = 0; i < len; i++) { + data1[i] = 20.0; + data2[i] = 20.0; + } + } + + // Modify host arrays for strided elements + data1[0] = 10.0; + data1[4] = 10.0; + data1[8] = 10.0; + data2[0] = 10.0; + data2[5] = 10.0; + + // data1[0:3:4] // indices 0,4,8 + // data2[0:2:5] // indices 0,5 +#pragma omp target update to(data1[0 : 3 : 4], data2[0 : 2 : 5]) + + // Verify on device by adding 5 +#pragma omp target + { + for (int i = 0; i < len; i++) + data1[i] += 5.0; + for (int i = 0; i < len; i++) + data2[i] += 5.0; + } + } + + printf("device array values after update to:\n"); + printf("data1:\n"); + for (int i = 0; i < len; i++) + printf("%.1f\n", data1[i]); + printf("data2:\n"); + for (int i = 0; i < len; i++) + printf("%.1f\n", data2[i]); + + // CHECK: device array values after update to: + // CHECK-NEXT: data1: + // CHECK-NEXT: 15.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 15.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 15.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: data2: + // CHECK-NEXT: 15.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 15.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 + // CHECK-NEXT: 25.0 +} diff --git a/offload/test/offloading/strided_partial_update.c b/offload/test/offloading/strided_partial_update_from.c index 4a2977a..4a2977a 100644 --- a/offload/test/offloading/strided_partial_update.c +++ b/offload/test/offloading/strided_partial_update_from.c diff --git a/offload/test/offloading/strided_partial_update_to.c b/offload/test/offloading/strided_partial_update_to.c new file mode 100644 index 0000000..f9c960f --- /dev/null +++ b/offload/test/offloading/strided_partial_update_to.c @@ -0,0 +1,74 @@ +// This test checks that #pragma omp target update to(data[0:4:3]) correctly +// updates every third element (stride 3) from the host to the device, partially +// across the array + +// RUN: %libomptarget-compile-run-and-check-generic +// XFAIL: intelgpu + +#include <omp.h> +#include <stdio.h> + +int main() { + int len = 11; + double data[len]; + + // Initialize on host + for (int i = 0; i < len; i++) + data[i] = i; + + // Initial values + printf("original host array values:\n"); + for (int i = 0; i < len; i++) + printf("%f\n", data[i]); + printf("\n"); + + // CHECK: 0.000000 + // CHECK: 1.000000 + // CHECK: 2.000000 + // CHECK: 3.000000 + // CHECK: 4.000000 + // CHECK: 5.000000 + // CHECK: 6.000000 + // CHECK: 7.000000 + // CHECK: 8.000000 + // CHECK: 9.000000 + // CHECK: 10.000000 + +#pragma omp target data map(tofrom : data[0 : len]) + { + // Initialize device array to 20 +#pragma omp target + for (int i = 0; i < len; i++) + data[i] = 20.0; + + // Modify host data for strided elements + data[0] = 10.0; + data[3] = 10.0; + data[6] = 10.0; + data[9] = 10.0; + +#pragma omp target update to(data[0 : 4 : 3]) // indices 0,3,6,9 + + // Verify on device by adding 5 +#pragma omp target + for (int i = 0; i < len; i++) + data[i] += 5.0; + } + + printf("device array values after update to:\n"); + for (int i = 0; i < len; i++) + printf("%f\n", data[i]); + printf("\n"); + + // CHECK: 15.000000 + // CHECK: 25.000000 + // CHECK: 25.000000 + // CHECK: 15.000000 + // CHECK: 25.000000 + // CHECK: 25.000000 + // CHECK: 15.000000 + // CHECK: 25.000000 + // CHECK: 25.000000 + // CHECK: 15.000000 + // CHECK: 25.000000 +} diff --git a/offload/test/offloading/strided_update.c b/offload/test/offloading/strided_update_from.c index 9910bed..9910bed 100644 --- a/offload/test/offloading/strided_update.c +++ b/offload/test/offloading/strided_update_from.c diff --git a/offload/test/offloading/strided_update_to.c b/offload/test/offloading/strided_update_to.c new file mode 100644 index 0000000..eca20ab --- /dev/null +++ b/offload/test/offloading/strided_update_to.c @@ -0,0 +1,74 @@ +// This test checks that "update to" clause in OpenMP is supported when the +// elements are updated in a non-contiguous manner. This test checks that +// #pragma omp target update to(data[0:4:2]) correctly updates only every +// other element (stride 2) from the host to the device + +// RUN: %libomptarget-compile-run-and-check-generic +// XFAIL: intelgpu + +#include <omp.h> +#include <stdio.h> + +int main() { + int len = 8; + double data[len]; + + // Initialize on host + for (int i = 0; i < len; i++) { + data[i] = i; + } + + // Initial values + printf("original host array values:\n"); + for (int i = 0; i < len; i++) + printf("%f\n", data[i]); + printf("\n"); + +#pragma omp target data map(tofrom : len, data[0 : len]) + { + // Initialize device to 20 +#pragma omp target + for (int i = 0; i < len; i++) { + data[i] = 20.0; + } + + // Modify host for strided elements + data[0] = 10.0; + data[2] = 10.0; + data[4] = 10.0; + data[6] = 10.0; + +#pragma omp target update to(data[0 : 4 : 2]) + + // Verify on device by adding 5 +#pragma omp target + for (int i = 0; i < len; i++) { + data[i] += 5.0; + } + } + + // CHECK: 0.000000 + // CHECK: 1.000000 + // CHECK: 2.000000 + // CHECK: 3.000000 + // CHECK: 4.000000 + // CHECK: 5.000000 + // CHECK: 6.000000 + // CHECK: 7.000000 + + printf("device array values after update to:\n"); + for (int i = 0; i < len; i++) + printf("%f\n", data[i]); + printf("\n"); + + // CHECK: 15.000000 + // CHECK: 25.000000 + // CHECK: 15.000000 + // CHECK: 25.000000 + // CHECK: 15.000000 + // CHECK: 25.000000 + // CHECK: 15.000000 + // CHECK: 25.000000 + + return 0; +} diff --git a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp index 47a299f..718e9e7 100644 --- a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp +++ b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp @@ -15,6 +15,7 @@ #include "llvm/Frontend/Offloading/Utility.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" @@ -60,11 +61,11 @@ int main(int argc, char **argv) { MemoryBuffer::getFile(InputFilename, /*isText=*/true, /*RequiresNullTerminator=*/true); if (!KernelInfoMB) - report_fatal_error("Error reading the kernel info json file"); + reportFatalUsageError("Error reading the kernel info json file"); Expected<json::Value> JsonKernelInfo = json::parse(KernelInfoMB.get()->getBuffer()); if (auto Err = JsonKernelInfo.takeError()) - report_fatal_error("Cannot parse the kernel info json file"); + reportFatalUsageError("Cannot parse the kernel info json file"); auto NumTeamsJson = JsonKernelInfo->getAsObject()->getInteger("NumTeamsClause"); @@ -104,7 +105,7 @@ int main(int argc, char **argv) { MemoryBuffer::getFile(KernelEntryName + ".image", /*isText=*/false, /*RequiresNullTerminator=*/false); if (!ImageMB) - report_fatal_error("Error reading the kernel image."); + reportFatalUsageError("Error reading the kernel image."); __tgt_device_image DeviceImage; DeviceImage.ImageStart = const_cast<char *>(ImageMB.get()->getBufferStart()); @@ -145,7 +146,7 @@ int main(int argc, char **argv) { /*RequiresNullTerminator=*/false); if (!DeviceMemoryMB) - report_fatal_error("Error reading the kernel input device memory."); + reportFatalUsageError("Error reading the kernel input device memory."); // On AMD for currently unknown reasons we cannot copy memory mapped data to // device. This is a work-around. @@ -178,14 +179,15 @@ int main(int argc, char **argv) { /*isText=*/false, /*RequiresNullTerminator=*/false); if (!OriginalOutputMB) - report_fatal_error("Error reading the kernel original output file, make " - "sure LIBOMPTARGET_SAVE_OUTPUT is set when recording"); + reportFatalUsageError( + "Error reading the kernel original output file, make sure " + "LIBOMPTARGET_SAVE_OUTPUT is set when recording"); ErrorOr<std::unique_ptr<MemoryBuffer>> ReplayOutputMB = MemoryBuffer::getFile(KernelEntryName + ".replay.output", /*isText=*/false, /*RequiresNullTerminator=*/false); if (!ReplayOutputMB) - report_fatal_error("Error reading the kernel replay output file"); + reportFatalUsageError("Error reading the kernel replay output file"); StringRef OriginalOutput = OriginalOutputMB.get()->getBuffer(); StringRef ReplayOutput = ReplayOutputMB.get()->getBuffer(); diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt index 546d7da..fce38c5 100644 --- a/offload/unittests/OffloadAPI/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/CMakeLists.txt @@ -48,7 +48,8 @@ add_offload_unittest("queue" queue/olGetQueueInfo.cpp queue/olGetQueueInfoSize.cpp queue/olWaitEvents.cpp - queue/olLaunchHostFunction.cpp) + queue/olLaunchHostFunction.cpp + queue/olQueryQueue.cpp) add_offload_unittest("symbol" symbol/olGetSymbol.cpp diff --git a/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp new file mode 100644 index 0000000..4efd939 --- /dev/null +++ b/offload/unittests/OffloadAPI/queue/olQueryQueue.cpp @@ -0,0 +1,24 @@ +//===------- Offload API tests - olQueryQueue ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--------------------------------------------------------------------===// + +#include "../common/Fixtures.hpp" +#include <OffloadAPI.h> +#include <gtest/gtest.h> + +using olQueryQueueTest = OffloadQueueTest; +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olQueryQueueTest); + +TEST_P(olQueryQueueTest, SuccessEmptyAsyncQueue) { + ASSERT_SUCCESS(olQueryQueue(Queue, nullptr)); +} + +TEST_P(olQueryQueueTest, SuccessEmptyAsyncQueueCheckResult) { + bool IsQueueWorkCompleted; + ASSERT_SUCCESS(olQueryQueue(Queue, &IsQueueWorkCompleted)); + ASSERT_TRUE(IsQueueWorkCompleted); +} |
