aboutsummaryrefslogtreecommitdiff
path: root/offload
diff options
context:
space:
mode:
Diffstat (limited to 'offload')
-rw-r--r--offload/CMakeLists.txt12
-rw-r--r--offload/cmake/OpenMPTesting.cmake4
-rw-r--r--offload/include/OpenMP/InteropAPI.h8
-rw-r--r--offload/include/OpenMP/omp.h8
-rw-r--r--offload/include/PerThreadTable.h259
-rw-r--r--offload/include/Shared/Debug.h438
-rw-r--r--offload/include/omptarget.h8
-rw-r--r--offload/liboffload/API/Device.td1
-rw-r--r--offload/liboffload/src/OffloadImpl.cpp8
-rw-r--r--offload/libomptarget/OffloadRTL.cpp9
-rw-r--r--offload/libomptarget/OpenMP/API.cpp58
-rw-r--r--offload/libomptarget/PluginManager.cpp16
-rw-r--r--offload/libomptarget/device.cpp78
-rw-r--r--offload/libomptarget/exports2
-rw-r--r--offload/plugins-nextgen/amdgpu/src/rtl.cpp13
-rw-r--r--offload/plugins-nextgen/common/include/PluginInterface.h7
-rw-r--r--offload/plugins-nextgen/common/src/RPC.cpp21
-rw-r--r--offload/plugins-nextgen/cuda/src/rtl.cpp12
-rw-r--r--offload/plugins-nextgen/host/src/rtl.cpp2
-rw-r--r--offload/test/api/omp_device_uid.c76
-rw-r--r--offload/test/api/omp_indirect_call_table_manual.c107
-rw-r--r--offload/test/lit.site.cfg.in2
-rw-r--r--offload/test/mapping/use_device_addr/target_data_use_device_addr_arrsec_fallback.c24
-rw-r--r--offload/test/mapping/use_device_addr/target_data_use_device_addr_var_fallback.c21
-rw-r--r--offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.c32
-rw-r--r--offload/test/offloading/fortran/declare-target-to-allocatable-vars-in-target-with-update.f9041
-rw-r--r--offload/test/offloading/fortran/declare-target-to-vars-target-region-and-update.f9040
-rw-r--r--offload/test/offloading/fortran/declare-target-to-zero-index-allocatable-target-map.f9030
-rw-r--r--offload/test/offloading/fortran/dtype-member-overlap-map.f9056
-rw-r--r--offload/test/offloading/fortran/implicit-derived-enter-exit.f9065
-rw-r--r--offload/test/offloading/fortran/target-custom-reduction-derivedtype.f9088
-rw-r--r--offload/test/offloading/fortran/target-is-device-ptr.f9060
-rw-r--r--offload/test/offloading/gpupgo/pgo_atomic_teams.c1
-rw-r--r--offload/test/offloading/gpupgo/pgo_atomic_threads.c1
-rw-r--r--offload/test/offloading/gpupgo/pgo_device_and_host.c1
-rw-r--r--offload/test/offloading/gpupgo/pgo_device_only.c1
-rw-r--r--offload/test/offloading/shared_lib_fp_mapping.c4
-rw-r--r--offload/test/offloading/static_linking.c2
-rw-r--r--offload/tools/deviceinfo/llvm-offload-device-info.cpp3
-rw-r--r--offload/tools/offload-tblgen/EntryPointGen.cpp16
-rw-r--r--offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp5
-rw-r--r--offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp2
42 files changed, 1472 insertions, 170 deletions
diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt
index b277380..6e801b1 100644
--- a/offload/CMakeLists.txt
+++ b/offload/CMakeLists.txt
@@ -90,18 +90,18 @@ else()
set(OPENMP_INSTALL_LIBDIR "lib${LLVM_LIBDIR_SUFFIX}")
if (NOT MSVC)
- set(OPENMP_TEST_C_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
- set(OPENMP_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++)
+ set(OPENMP_TEST_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang)
+ set(OPENMP_TEST_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++)
else()
- set(OPENMP_TEST_C_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang.exe)
- set(OPENMP_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++.exe)
+ set(OPENMP_TEST_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang.exe)
+ set(OPENMP_TEST_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++.exe)
endif()
# Check for flang
if (NOT MSVC)
- set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang)
+ set(OPENMP_TEST_Fortran_COMPILER ${LLVM_TOOLS_BINARY_DIR}/flang)
else()
- set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang.exe)
+ set(OPENMP_TEST_Fortran_COMPILER ${LLVM_TOOLS_BINARY_DIR}/flang.exe)
endif()
# Set fortran test compiler if flang is found
diff --git a/offload/cmake/OpenMPTesting.cmake b/offload/cmake/OpenMPTesting.cmake
index ef8cf34..b564e46 100644
--- a/offload/cmake/OpenMPTesting.cmake
+++ b/offload/cmake/OpenMPTesting.cmake
@@ -68,9 +68,9 @@ else()
message(WARNING "The check targets will not be available!")
set(ENABLE_CHECK_TARGETS FALSE)
else()
- set(OPENMP_FILECHECK_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/FileCheck)
+ set(OPENMP_FILECHECK_EXECUTABLE ${LLVM_TOOLS_BINARY_DIR}/FileCheck)
endif()
- set(OPENMP_NOT_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/not)
+ set(OPENMP_NOT_EXECUTABLE ${LLVM_TOOLS_BINARY_DIR}/not)
endif()
set(OFFLOAD_DEVICE_INFO_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-offload-device-info)
set(OFFLOAD_TBLGEN_EXECUTABLE ${LLVM_RUNTIME_OUTPUT_INTDIR}/offload-tblgen)
diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h
index 8c06ba3..02e097e 100644
--- a/offload/include/OpenMP/InteropAPI.h
+++ b/offload/include/OpenMP/InteropAPI.h
@@ -160,17 +160,11 @@ struct InteropTableEntry {
Interops.push_back(obj);
}
- template <class ClearFuncTy> void clear(ClearFuncTy f) {
- for (auto &Obj : Interops) {
- f(Obj);
- }
- }
-
/// vector interface
int size() const { return Interops.size(); }
iterator begin() { return Interops.begin(); }
iterator end() { return Interops.end(); }
- iterator erase(iterator it) { return Interops.erase(it); }
+ void clear() { Interops.clear(); }
};
struct InteropTblTy
diff --git a/offload/include/OpenMP/omp.h b/offload/include/OpenMP/omp.h
index 768ca46..8db0a05 100644
--- a/offload/include/OpenMP/omp.h
+++ b/offload/include/OpenMP/omp.h
@@ -30,6 +30,14 @@
extern "C" {
+/// Definitions
+///{
+
+// See definition in OpenMP (omp.h.var/omp_lib.(F90|h).var)
+#define omp_invalid_device -2
+
+///}
+
/// Type declarations
///{
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 45b1961..b547761 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -14,101 +14,256 @@
#define OFFLOAD_PERTHREADTABLE_H
#include <list>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/Support/Error.h>
#include <memory>
#include <mutex>
+#include <type_traits>
+
+template <typename ObjectType> class PerThread {
+ std::mutex Mutex;
+ llvm::SmallVector<std::shared_ptr<ObjectType>> ThreadDataList;
+
+ ObjectType &getThreadData() {
+ static thread_local std::shared_ptr<ObjectType> ThreadData = nullptr;
+ if (!ThreadData) {
+ ThreadData = std::make_shared<ObjectType>();
+ std::lock_guard<std::mutex> Lock(Mutex);
+ ThreadDataList.push_back(ThreadData);
+ }
+ return *ThreadData;
+ }
+
+public:
+ // Define default constructors, disable copy and move constructors.
+ PerThread() = default;
+ PerThread(const PerThread &) = delete;
+ PerThread(PerThread &&) = delete;
+ PerThread &operator=(const PerThread &) = delete;
+ PerThread &operator=(PerThread &&) = delete;
+ ~PerThread() {
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
+ "Cannot be deleted while other threads are adding entries");
+ ThreadDataList.clear();
+ }
+
+ ObjectType &get() { return getThreadData(); }
+
+ template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
+ "Clear cannot be called while other threads are adding entries");
+ for (std::shared_ptr<ObjectType> ThreadData : ThreadDataList) {
+ if (!ThreadData)
+ continue;
+ ClearFunc(*ThreadData);
+ }
+ ThreadDataList.clear();
+ }
+};
+
+template <typename ContainerTy> struct ContainerConcepts {
+ template <typename, template <typename> class, typename = std::void_t<>>
+ struct has : std::false_type {};
+ template <typename Ty, template <typename> class Op>
+ struct has<Ty, Op, std::void_t<Op<Ty>>> : std::true_type {};
+
+ template <typename Ty> using IteratorTypeCheck = typename Ty::iterator;
+ template <typename Ty> using MappedTypeCheck = typename Ty::mapped_type;
+ template <typename Ty> using ValueTypeCheck = typename Ty::value_type;
+ template <typename Ty> using KeyTypeCheck = typename Ty::key_type;
+ template <typename Ty> using SizeTypeCheck = typename Ty::size_type;
+
+ template <typename Ty>
+ using ClearCheck = decltype(std::declval<Ty>().clear());
+ template <typename Ty>
+ using ReserveCheck = decltype(std::declval<Ty>().reserve(1));
+ template <typename Ty>
+ using ResizeCheck = decltype(std::declval<Ty>().resize(1));
+
+ static constexpr bool hasIterator =
+ has<ContainerTy, IteratorTypeCheck>::value;
+ static constexpr bool hasClear = has<ContainerTy, ClearCheck>::value;
+ static constexpr bool isAssociative =
+ has<ContainerTy, MappedTypeCheck>::value;
+ static constexpr bool hasReserve = has<ContainerTy, ReserveCheck>::value;
+ static constexpr bool hasResize = has<ContainerTy, ResizeCheck>::value;
+
+ template <typename, template <typename> class, typename = std::void_t<>>
+ struct has_type {
+ using type = void;
+ };
+ template <typename Ty, template <typename> class Op>
+ struct has_type<Ty, Op, std::void_t<Op<Ty>>> {
+ using type = Op<Ty>;
+ };
+
+ using iterator = typename has_type<ContainerTy, IteratorTypeCheck>::type;
+ using value_type = typename std::conditional_t<
+ isAssociative, typename has_type<ContainerTy, MappedTypeCheck>::type,
+ typename has_type<ContainerTy, ValueTypeCheck>::type>;
+ using key_type = typename std::conditional_t<
+ isAssociative, typename has_type<ContainerTy, KeyTypeCheck>::type,
+ typename has_type<ContainerTy, SizeTypeCheck>::type>;
+};
// Using an STL container (such as std::vector) indexed by thread ID has
// too many race conditions issues so we store each thread entry into a
// thread_local variable.
-// T is the container type used to store the objects, e.g., std::vector,
-// std::set, etc. by each thread. O is the type of the stored objects e.g.,
-// omp_interop_val_t *, ...
-
-template <typename ContainerType, typename ObjectType> struct PerThreadTable {
- using iterator = typename ContainerType::iterator;
+// ContainerType is the container type used to store the objects, e.g.,
+// std::vector, std::set, etc. by each thread. ObjectType is the type of the
+// stored objects e.g., omp_interop_val_t *, ...
+template <typename ContainerType, typename ObjectType> class PerThreadTable {
+ using iterator = typename ContainerConcepts<ContainerType>::iterator;
struct PerThreadData {
- size_t NElements = 0;
- std::unique_ptr<ContainerType> ThEntry;
+ size_t Size = 0;
+ std::unique_ptr<ContainerType> ThreadEntry;
};
- std::mutex Mtx;
- std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
-
- // define default constructors, disable copy and move constructors
- PerThreadTable() = default;
- PerThreadTable(const PerThreadTable &) = delete;
- PerThreadTable(PerThreadTable &&) = delete;
- PerThreadTable &operator=(const PerThreadTable &) = delete;
- PerThreadTable &operator=(PerThreadTable &&) = delete;
- ~PerThreadTable() {
- std::lock_guard<std::mutex> Lock(Mtx);
- ThreadDataList.clear();
- }
+ std::mutex Mutex;
+ llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
-private:
PerThreadData &getThreadData() {
- static thread_local std::shared_ptr<PerThreadData> ThData = nullptr;
- if (!ThData) {
- ThData = std::make_shared<PerThreadData>();
- std::lock_guard<std::mutex> Lock(Mtx);
- ThreadDataList.push_back(ThData);
+ static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
+ if (!ThreadData) {
+ ThreadData = std::make_shared<PerThreadData>();
+ std::lock_guard<std::mutex> Lock(Mutex);
+ ThreadDataList.push_back(ThreadData);
}
- return *ThData;
+ return *ThreadData;
}
protected:
ContainerType &getThreadEntry() {
- auto &ThData = getThreadData();
- if (ThData.ThEntry)
- return *ThData.ThEntry;
- ThData.ThEntry = std::make_unique<ContainerType>();
- return *ThData.ThEntry;
+ PerThreadData &ThreadData = getThreadData();
+ if (ThreadData.ThreadEntry)
+ return *ThreadData.ThreadEntry;
+ ThreadData.ThreadEntry = std::make_unique<ContainerType>();
+ return *ThreadData.ThreadEntry;
+ }
+
+ size_t &getThreadSize() {
+ PerThreadData &ThreadData = getThreadData();
+ return ThreadData.Size;
}
- size_t &getThreadNElements() {
- auto &ThData = getThreadData();
- return ThData.NElements;
+ void setSize(size_t Size) {
+ size_t &SizeRef = getThreadSize();
+ SizeRef = Size;
}
public:
+ // define default constructors, disable copy and move constructors.
+ PerThreadTable() = default;
+ PerThreadTable(const PerThreadTable &) = delete;
+ PerThreadTable(PerThreadTable &&) = delete;
+ PerThreadTable &operator=(const PerThreadTable &) = delete;
+ PerThreadTable &operator=(PerThreadTable &&) = delete;
+ ~PerThreadTable() {
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
+ "Cannot be deleted while other threads are adding entries");
+ ThreadDataList.clear();
+ }
+
void add(ObjectType obj) {
- auto &Entry = getThreadEntry();
- auto &NElements = getThreadNElements();
- NElements++;
+ ContainerType &Entry = getThreadEntry();
+ size_t &SizeRef = getThreadSize();
+ SizeRef++;
Entry.add(obj);
}
iterator erase(iterator it) {
- auto &Entry = getThreadEntry();
- auto &NElements = getThreadNElements();
- NElements--;
+ ContainerType &Entry = getThreadEntry();
+ size_t &SizeRef = getThreadSize();
+ SizeRef--;
return Entry.erase(it);
}
- size_t size() { return getThreadNElements(); }
+ size_t size() { return getThreadSize(); }
// Iterators to traverse objects owned by
- // the current thread
+ // the current thread.
iterator begin() {
- auto &Entry = getThreadEntry();
+ ContainerType &Entry = getThreadEntry();
return Entry.begin();
}
iterator end() {
- auto &Entry = getThreadEntry();
+ ContainerType &Entry = getThreadEntry();
return Entry.end();
}
- template <class F> void clear(F f) {
- std::lock_guard<std::mutex> Lock(Mtx);
- for (auto ThData : ThreadDataList) {
- if (!ThData->ThEntry || ThData->NElements == 0)
+ template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
+ "Clear cannot be called while other threads are adding entries");
+ for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
+ if (!ThreadData->ThreadEntry || ThreadData->Size == 0)
continue;
- ThData->ThEntry->clear(f);
- ThData->NElements = 0;
+ if constexpr (ContainerConcepts<ContainerType>::hasIterator &&
+ ContainerConcepts<ContainerType>::hasClear) {
+ for (auto &Obj : *ThreadData->ThreadEntry) {
+ if constexpr (ContainerConcepts<ContainerType>::isAssociative) {
+ ClearFunc(Obj.second);
+ } else {
+ ClearFunc(Obj);
+ }
+ }
+ ThreadData->ThreadEntry->clear();
+ } else {
+ static_assert(true, "Container type not supported");
+ }
+ ThreadData->Size = 0;
}
ThreadDataList.clear();
}
+
+ template <class DeinitFuncTy> llvm::Error deinit(DeinitFuncTy DeinitFunc) {
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
+ "Deinit cannot be called while other threads are adding entries");
+ for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
+ if (!ThreadData->ThreadEntry || ThreadData->Size == 0)
+ continue;
+ for (auto &Obj : *ThreadData->ThreadEntry) {
+ if constexpr (ContainerConcepts<ContainerType>::isAssociative) {
+ if (auto Err = DeinitFunc(Obj.second))
+ return Err;
+ } else {
+ if (auto Err = DeinitFunc(Obj))
+ return Err;
+ }
+ }
+ }
+ return llvm::Error::success();
+ }
+};
+
+template <typename ContainerType, size_t ReserveSize = 0>
+class PerThreadContainer
+ : public PerThreadTable<ContainerType, typename ContainerConcepts<
+ ContainerType>::value_type> {
+
+ using IndexType = typename ContainerConcepts<ContainerType>::key_type;
+ using ObjectType = typename ContainerConcepts<ContainerType>::value_type;
+
+public:
+ // Get the object for the given index in the current thread.
+ ObjectType &get(IndexType Index) {
+ ContainerType &Entry = this->getThreadEntry();
+
+ // Specialized code for vector-like containers.
+ if constexpr (ContainerConcepts<ContainerType>::hasResize) {
+ if (Index >= Entry.size()) {
+ if constexpr (ContainerConcepts<ContainerType>::hasReserve &&
+ ReserveSize > 0)
+ Entry.reserve(ReserveSize);
+
+ // If the index is out of bounds, try resize the container.
+ Entry.resize(Index + 1);
+ }
+ }
+ ObjectType &Ret = Entry[Index];
+ this->setSize(Entry.size());
+ return Ret;
+ }
};
#endif
diff --git a/offload/include/Shared/Debug.h b/offload/include/Shared/Debug.h
index 7c3db8d..9e657e6 100644
--- a/offload/include/Shared/Debug.h
+++ b/offload/include/Shared/Debug.h
@@ -39,9 +39,13 @@
#define OMPTARGET_SHARED_DEBUG_H
#include <atomic>
+#include <cstdarg>
#include <mutex>
#include <string>
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/circular_raw_ostream.h"
+
/// 32-Bit field data attributes controlling information presented to the user.
enum OpenMPInfoType : uint32_t {
// Print data arguments and attributes upon entering an OpenMP device kernel.
@@ -75,17 +79,6 @@ inline std::atomic<uint32_t> &getInfoLevelInternal() {
inline uint32_t getInfoLevel() { return getInfoLevelInternal().load(); }
-inline uint32_t getDebugLevel() {
- static uint32_t DebugLevel = 0;
- static std::once_flag Flag{};
- std::call_once(Flag, []() {
- if (char *EnvStr = getenv("LIBOMPTARGET_DEBUG"))
- DebugLevel = std::stoi(EnvStr);
- });
-
- return DebugLevel;
-}
-
#undef USED
#undef GCC_VERSION
@@ -144,46 +137,11 @@ inline uint32_t getDebugLevel() {
fprintf(_stdDst, __VA_ARGS__); \
} while (0)
-// Debugging messages
-#ifdef OMPTARGET_DEBUG
-#include <stdio.h>
-
-#define DEBUGP(prefix, ...) \
- { \
- fprintf(stderr, "%s --> ", prefix); \
- fprintf(stderr, __VA_ARGS__); \
- }
-
-/// Emit a message for debugging
-#define DP(...) \
- do { \
- if (getDebugLevel() > 0) { \
- DEBUGP(DEBUG_PREFIX, __VA_ARGS__); \
- } \
- } while (false)
-
-/// Emit a message for debugging or failure if debugging is disabled
-#define REPORT(...) \
- do { \
- if (getDebugLevel() > 0) { \
- DP(__VA_ARGS__); \
- } else { \
- FAILURE_MESSAGE(__VA_ARGS__); \
- } \
- } while (false)
-#else
-#define DEBUGP(prefix, ...) \
- {}
-#define DP(...) \
- {}
-#define REPORT(...) FAILURE_MESSAGE(__VA_ARGS__);
-#endif // OMPTARGET_DEBUG
-
/// Emit a message giving the user extra information about the runtime if
#define INFO(_flags, _id, ...) \
do { \
- if (getDebugLevel() > 0) { \
- DEBUGP(DEBUG_PREFIX, __VA_ARGS__); \
+ if (::llvm::offload::debug::isDebugEnabled()) { \
+ DP(__VA_ARGS__); \
} else if (getInfoLevel() & _flags) { \
INFO_MESSAGE(_id, __VA_ARGS__); \
} \
@@ -198,4 +156,388 @@ inline uint32_t getDebugLevel() {
} \
} while (false)
+namespace llvm::offload::debug {
+
+/// A raw_ostream that tracks `\n` and print the prefix after each
+/// newline. Based on raw_ldbg_ostream from Support/DebugLog.h
+class LLVM_ABI odbg_ostream final : public raw_ostream {
+public:
+ enum IfLevel : uint32_t;
+ enum OnlyLevel : uint32_t;
+
+private:
+ std::string Prefix;
+ raw_ostream &Os;
+ uint32_t BaseLevel;
+ bool ShouldPrefixNextString;
+ bool ShouldEmitNewLineOnDestruction;
+ bool NeedEndNewLine = false;
+
+ /// If the stream is muted, writes to it are ignored
+ bool Muted = false;
+
+ /// Split the line on newlines and insert the prefix before each
+ /// newline. Forward everything to the underlying stream.
+ void write_impl(const char *Ptr, size_t Size) final {
+ if (Muted)
+ return;
+
+ NeedEndNewLine = false;
+ auto Str = StringRef(Ptr, Size);
+ auto Eol = Str.find('\n');
+ // Handle `\n` occurring in the string, ensure to print the prefix at the
+ // beginning of each line.
+ while (Eol != StringRef::npos) {
+ // Take the line up to the newline (including the newline).
+ StringRef Line = Str.take_front(Eol + 1);
+ if (!Line.empty())
+ writeWithPrefix(Line);
+ // We printed a newline, record here to print a prefix.
+ ShouldPrefixNextString = true;
+ Str = Str.drop_front(Eol + 1);
+ Eol = Str.find('\n');
+ }
+ if (!Str.empty()) {
+ writeWithPrefix(Str);
+ NeedEndNewLine = true;
+ }
+ }
+ void emitPrefix() { Os.write(Prefix.c_str(), Prefix.size()); }
+ void writeWithPrefix(StringRef Str) {
+ if (ShouldPrefixNextString) {
+ emitPrefix();
+ ShouldPrefixNextString = false;
+ }
+ Os.write(Str.data(), Str.size());
+ }
+
+public:
+ explicit odbg_ostream(std::string Prefix, raw_ostream &Os, uint32_t BaseLevel,
+ bool ShouldPrefixNextString = true,
+ bool ShouldEmitNewLineOnDestruction = true)
+ : Prefix(std::move(Prefix)), Os(Os), BaseLevel(BaseLevel),
+ ShouldPrefixNextString(ShouldPrefixNextString),
+ ShouldEmitNewLineOnDestruction(ShouldEmitNewLineOnDestruction) {
+ SetUnbuffered();
+ }
+ ~odbg_ostream() final {
+ if (ShouldEmitNewLineOnDestruction && NeedEndNewLine)
+ Os << '\n';
+ }
+ odbg_ostream(const odbg_ostream &) = delete;
+ odbg_ostream &operator=(const odbg_ostream &) = delete;
+ odbg_ostream(odbg_ostream &&other) : Os(other.Os) {
+ Prefix = std::move(other.Prefix);
+ BaseLevel = other.BaseLevel;
+ ShouldPrefixNextString = other.ShouldPrefixNextString;
+ ShouldEmitNewLineOnDestruction = other.ShouldEmitNewLineOnDestruction;
+ NeedEndNewLine = other.NeedEndNewLine;
+ Muted = other.Muted;
+ }
+
+ /// Forward the current_pos method to the underlying stream.
+ uint64_t current_pos() const final { return Os.tell(); }
+
+ /// Some of the `<<` operators expect an lvalue, so we trick the type
+ /// system.
+ odbg_ostream &asLvalue() { return *this; }
+
+ void shouldMute(const IfLevel Filter) { Muted = Filter > BaseLevel; }
+ void shouldMute(const OnlyLevel Filter) { Muted = BaseLevel != Filter; }
+};
+
+/// dbgs - Return a circular-buffered debug stream.
+[[maybe_unused]] static llvm::raw_ostream &dbgs() {
+ // Do one-time initialization in a thread-safe way.
+ static struct dbgstream {
+ llvm::circular_raw_ostream strm;
+
+ dbgstream() : strm(llvm::errs(), "*** Debug Log Output ***\n", 0) {}
+ } thestrm;
+
+ return thestrm.strm;
+}
+
+#ifdef OMPTARGET_DEBUG
+
+struct DebugFilter {
+ StringRef Type;
+ uint32_t Level;
+};
+
+struct DebugSettings {
+ bool Enabled = false;
+ uint32_t DefaultLevel = 1;
+ llvm::SmallVector<DebugFilter> Filters;
+};
+
+[[maybe_unused]] static DebugFilter parseDebugFilter(StringRef Filter) {
+ size_t Pos = Filter.find(':');
+ if (Pos == StringRef::npos)
+ return {Filter, 1};
+
+ StringRef Type = Filter.slice(0, Pos);
+ uint32_t Level = 1;
+ if (Filter.slice(Pos + 1, Filter.size()).getAsInteger(10, Level))
+ Level = 1;
+
+ return {Type, Level};
+}
+
+[[maybe_unused]] static DebugSettings &getDebugSettings() {
+ static DebugSettings Settings;
+ static std::once_flag Flag{};
+ std::call_once(Flag, []() {
+ // Eventually, we probably should allow the upper layers to set
+ // debug settings directly according to their own env var or
+ // other methods.
+ // For now, mantain compatibility with existing libomptarget env var
+ // and add a liboffload independent one.
+ char *Env = getenv("LIBOMPTARGET_DEBUG");
+ if (!Env) {
+ Env = getenv("LIBOFFLOAD_DEBUG");
+ if (!Env)
+ return;
+ }
+
+ StringRef EnvRef(Env);
+ if (EnvRef == "0")
+ return;
+
+ Settings.Enabled = true;
+ if (EnvRef.equals_insensitive("all"))
+ return;
+
+ if (!EnvRef.getAsInteger(10, Settings.DefaultLevel))
+ return;
+
+ Settings.DefaultLevel = 1;
+
+ for (auto &FilterSpec : llvm::split(EnvRef, ',')) {
+ if (FilterSpec.empty())
+ continue;
+ Settings.Filters.push_back(parseDebugFilter(FilterSpec));
+ }
+ });
+
+ return Settings;
+}
+
+inline bool isDebugEnabled() { return getDebugSettings().Enabled; }
+
+[[maybe_unused]] static bool
+shouldPrintDebug(const char *Component, const char *Type, uint32_t &Level) {
+ const auto &Settings = getDebugSettings();
+ if (!Settings.Enabled)
+ return false;
+
+ if (Settings.Filters.empty()) {
+ if (Level <= Settings.DefaultLevel) {
+ Level = Settings.DefaultLevel;
+ return true;
+ }
+ return false;
+ }
+
+ for (const auto &DT : Settings.Filters) {
+ if (DT.Level < Level)
+ continue;
+ if (DT.Type.equals_insensitive(Type) ||
+ DT.Type.equals_insensitive(Component)) {
+ Level = DT.Level;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// Compute the prefix for the debug log in the form of:
+/// "Component --> "
+[[maybe_unused]] static std::string computePrefix(StringRef Component,
+ StringRef DebugType) {
+ std::string Prefix;
+ raw_string_ostream OsPrefix(Prefix);
+ OsPrefix << Component << " --> ";
+ return OsPrefix.str();
+}
+
+static inline raw_ostream &operator<<(raw_ostream &Os,
+ const odbg_ostream::IfLevel Filter) {
+ odbg_ostream &Dbg = static_cast<odbg_ostream &>(Os);
+ Dbg.shouldMute(Filter);
+ return Dbg;
+}
+
+static inline raw_ostream &operator<<(raw_ostream &Os,
+ const odbg_ostream::OnlyLevel Filter) {
+ odbg_ostream &Dbg = static_cast<odbg_ostream &>(Os);
+ Dbg.shouldMute(Filter);
+ return Dbg;
+}
+
+#define ODBG_BASE(Stream, Component, Prefix, Type, Level) \
+ for (uint32_t RealLevel = (Level), \
+ _c = llvm::offload::debug::isDebugEnabled() && \
+ llvm::offload::debug::shouldPrintDebug( \
+ (Component), (Type), RealLevel); \
+ _c; _c = 0) \
+ ::llvm::offload::debug::odbg_ostream{ \
+ ::llvm::offload::debug::computePrefix((Prefix), (Type)), (Stream), \
+ RealLevel, /*ShouldPrefixNextString=*/true, \
+ /*ShouldEmitNewLineOnDestruction=*/true} \
+ .asLvalue()
+
+#define ODBG_STREAM(Stream, Type, Level) \
+ ODBG_BASE(Stream, GETNAME(TARGET_NAME), DEBUG_PREFIX, Type, Level)
+
+#define ODBG_0() ODBG_2("default", 1)
+#define ODBG_1(Type) ODBG_2(Type, 1)
+#define ODBG_2(Type, Level) \
+ ODBG_STREAM(llvm::offload::debug::dbgs(), Type, Level)
+#define ODBG_SELECT(Type, Level, NArgs, ...) ODBG_##NArgs
+
+// Print a debug message of a certain type and verbosity level. If no type
+// or level is provided, "default" and "1" are assumed respectively.
+// Usage examples:
+// ODBG("type1", 2) << "This is a level 2 message of type1";
+// ODBG("Init") << "This is a default level of the init type";
+// ODBG() << "This is a level 1 message of the default type";
+// ODBG("Init", 3) << NumDevices << " were initialized";
+// ODBG("Kernel") << "Launching " << KernelName << " on device " << DeviceId;
+#define ODBG(...) ODBG_SELECT(__VA_ARGS__ __VA_OPT__(, ) 2, 1, 0)(__VA_ARGS__)
+
+// Filter the next elements in the debug stream if the current debug level is
+// lower than specified level. Example:
+// ODBG("Mapping", 2) << "level 2 info "
+// << ODBG_IF_LEVEL(3) << " level 3 info" << Arg
+// << ODBG_IF_LEVEL(4) << " level 4 info" << &Arg
+// << ODBG_RESET_LEVEL() << " more level 2 info";
+#define ODBG_IF_LEVEL(Level) \
+ static_cast<llvm::offload::debug::odbg_ostream::IfLevel>(Level)
+
+// Filter the next elements in the debug stream if the current debug level is
+// not exactly the specified level. Example:
+// ODBG() << "Starting computation "
+// << ODBG_ONLY_LEVEL(1) << "on a device"
+// << ODBG_ONLY_LEVEL(2) << "and mapping data on device" << DeviceId;
+// << ODBG_ONLY_LEVEL(3) << dumpDetailedMappingInfo(DeviceId);
+#define ODBG_ONLY_LEVEL(Level) \
+ static_cast<llvm::offload::debug::odbg_ostream::OnlyLevel>(Level)
+
+// Reset the level back to the original level after ODBG_IF_LEVEL or
+// ODBG_ONLY_LEVEL have been used
+#define ODBG_RESET_LEVEL() \
+ static_cast<llvm::offload::debug::odbg_ostream::IfLevel>(0)
+
+#else
+
+inline bool isDebugEnabled() { return false; }
+
+#define ODBG_NULL \
+ for (bool _c = false; _c; _c = false) \
+ ::llvm::nulls()
+
+// Don't print anything if debugging is disabled
+#define ODBG_BASE(Stream, Component, Prefix, Type, Level) ODBG_NULL
+#define ODBG_STREAM(Stream, Type, Level) ODBG_NULL
+#define ODBG_IF_LEVEL(Level) 0
+#define ODBG_ONLY_LEVEL(Level) 0
+#define ODBG_RESET_LEVEL() 0
+#define ODBG(...) ODBG_NULL
+
+#endif
+
+} // namespace llvm::offload::debug
+
+namespace llvm::omp::target::debug {
+using namespace llvm::offload::debug;
+
+enum OmpDebugLevel : uint32_t {
+ ODL_Default = 1,
+ ODL_Error = ODL_Default,
+ ODL_Detailed = 2,
+ ODL_Verbose = 3,
+ ODL_VeryVerbose = 4,
+ ODL_Dumping = 5
+};
+
+/* Debug types to use in libomptarget */
+constexpr const char *ODT_Init = "Init";
+constexpr const char *ODT_Mapping = "Mapping";
+constexpr const char *ODT_Kernel = "Kernel";
+constexpr const char *ODT_DataTransfer = "DataTransfer";
+constexpr const char *ODT_Sync = "Sync";
+constexpr const char *ODT_Deinit = "Deinit";
+constexpr const char *ODT_Error = "Error";
+constexpr const char *ODT_KernelArgs = "KernelArgs";
+constexpr const char *ODT_MappingExists = "MappingExists";
+constexpr const char *ODT_DumpTable = "DumpTable";
+constexpr const char *ODT_MappingChanged = "MappingChanged";
+constexpr const char *ODT_PluginKernel = "PluginKernel";
+constexpr const char *ODT_EmptyMapping = "EmptyMapping";
+
+static inline odbg_ostream reportErrorStream() {
+#ifdef OMPTARGET_DEBUG
+ if (::llvm::offload::debug::isDebugEnabled()) {
+ uint32_t RealLevel = ODL_Error;
+ if (::llvm::offload::debug::shouldPrintDebug(GETNAME(TARGET_NAME),
+ (ODT_Error), RealLevel))
+ return odbg_ostream{
+ ::llvm::offload::debug::computePrefix(DEBUG_PREFIX, ODT_Error),
+ ::llvm::offload::debug::dbgs(), RealLevel};
+ else
+ return odbg_ostream{"", ::llvm::nulls(), 1};
+ }
+#endif
+ return odbg_ostream{GETNAME(TARGET_NAME) " error: ",
+ ::llvm::offload::debug::dbgs(), ODL_Error};
+}
+
+#ifdef OMPTARGET_DEBUG
+// Deprecated debug print macros
+[[maybe_unused]] static std::string formatToStr(const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ size_t len = std::vsnprintf(NULL, 0, format, args);
+ va_end(args);
+ llvm::SmallVector<char, 128> vec(len + 1);
+ va_start(args, format);
+ std::vsnprintf(&vec[0], len + 1, format, args);
+ va_end(args);
+ return &vec[0];
+}
+
+// helper macro to support old DP and REPORT macros with printf syntax
+#define FORMAT_TO_STR(Format, ...) \
+ ::llvm::omp::target::debug::formatToStr(Format __VA_OPT__(, ) __VA_ARGS__)
+
+#define DP(...) ODBG() << FORMAT_TO_STR(__VA_ARGS__);
+
+#define REPORT_INT_OLD(...) \
+ do { \
+ if (::llvm::offload::debug::isDebugEnabled()) { \
+ ODBG(::llvm::omp::target::debug::ODT_Error, \
+ ::llvm::omp::target::debug::ODL_Error) \
+ << FORMAT_TO_STR(__VA_ARGS__); \
+ } else { \
+ FAILURE_MESSAGE(__VA_ARGS__); \
+ } \
+ } while (false)
+
+#else
+#define DP(...) \
+ { \
+ }
+#define REPORT_INT_OLD(...) FAILURE_MESSAGE(__VA_ARGS__);
+#endif // OMPTARGET_DEBUG
+
+// This is used for the new style REPORT macro
+#define REPORT_INT() ::llvm::omp::target::debug::reportErrorStream()
+
+// Make REPORT compatible with old and new syntax
+#define REPORT(...) REPORT_INT##__VA_OPT__(_OLD)(__VA_ARGS__)
+
+} // namespace llvm::omp::target::debug
+
#endif // OMPTARGET_SHARED_DEBUG_H
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 89aa468..0305899 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -94,6 +94,8 @@ enum OpenMPOffloadingDeclareTargetFlags {
OMP_DECLARE_TARGET_INDIRECT = 0x08,
/// This is an entry corresponding to a requirement to be registered.
OMP_REGISTER_REQUIRES = 0x10,
+ /// Mark the entry global as being an indirect vtable.
+ OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20,
};
enum TargetAllocTy : int32_t {
@@ -103,10 +105,6 @@ enum TargetAllocTy : int32_t {
TARGET_ALLOC_DEFAULT,
};
-inline KernelArgsTy CTorDTorKernelArgs = {
- 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr,
- nullptr, 0, {0, 0, 0}, {1, 0, 0}, {1, 0, 0}, 0};
-
struct DeviceTy;
/// The libomptarget wrapper around a __tgt_async_info object directly
@@ -274,6 +272,8 @@ extern "C" {
void ompx_dump_mapping_tables(void);
int omp_get_num_devices(void);
int omp_get_device_num(void);
+int omp_get_device_from_uid(const char *DeviceUid);
+const char *omp_get_uid_from_device(int DeviceNum);
int omp_get_initial_device(void);
void *omp_target_alloc(size_t Size, int DeviceNum);
void omp_target_free(void *DevicePtr, int DeviceNum);
diff --git a/offload/liboffload/API/Device.td b/offload/liboffload/API/Device.td
index e9c1548..6ada191 100644
--- a/offload/liboffload/API/Device.td
+++ b/offload/liboffload/API/Device.td
@@ -43,6 +43,7 @@ def ol_device_info_t : Enum {
TaggedEtor<"ADDRESS_BITS", "uint32_t", "Number of bits used to represent an address in device memory">,
TaggedEtor<"MAX_MEM_ALLOC_SIZE", "uint64_t", "The maximum size of memory object allocation in bytes">,
TaggedEtor<"GLOBAL_MEM_SIZE", "uint64_t", "The size of global device memory in bytes">,
+ TaggedEtor<"WORK_GROUP_LOCAL_MEM_SIZE", "uint64_t", "The maximum size of local shared memory per work group in bytes">,
];
list<TaggedEtor> fp_configs = !foreach(type, ["Single", "Double", "Half"], TaggedEtor<type # "_FP_CONFIG", "ol_device_fp_capability_flags_t", type # " precision floating point capability">);
list<TaggedEtor> native_vec_widths = !foreach(type, ["char","short","int","long","float","double","half"], TaggedEtor<"NATIVE_VECTOR_WIDTH_" # type, "uint32_t", "Native vector width for " # type>);
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 84bc414..eab9627 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -495,6 +495,13 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
return Info.write(static_cast<uint32_t>(Value));
}
+ case OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE: {
+ if (!std::holds_alternative<uint64_t>(Entry->Value))
+ return makeError(ErrorCode::BACKEND_FAILURE,
+ "plugin returned incorrect type");
+ return Info.write(std::get<uint64_t>(Entry->Value));
+ }
+
case OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION:
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION: {
// {x, y, z} triples
@@ -590,6 +597,7 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
return Info.write<uint32_t>(std::numeric_limits<uintptr_t>::digits);
case OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE:
case OL_DEVICE_INFO_GLOBAL_MEM_SIZE:
+ case OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE:
return Info.write<uint64_t>(0);
default:
return createOffloadError(ErrorCode::INVALID_ENUMERATION,
diff --git a/offload/libomptarget/OffloadRTL.cpp b/offload/libomptarget/OffloadRTL.cpp
index 04bd21e..3a18d76 100644
--- a/offload/libomptarget/OffloadRTL.cpp
+++ b/offload/libomptarget/OffloadRTL.cpp
@@ -19,6 +19,7 @@
#ifdef OMPT_SUPPORT
extern void llvm::omp::target::ompt::connectLibrary();
#endif
+using namespace llvm::omp::target::debug;
static std::mutex PluginMtx;
static uint32_t RefCount = 0;
@@ -35,7 +36,7 @@ void initRuntime() {
RefCount++;
if (RefCount == 1) {
- DP("Init offload library!\n");
+ ODBG(ODT_Init) << "Init offload library!";
#ifdef OMPT_SUPPORT
// Initialize OMPT first
llvm::omp::target::ompt::connectLibrary();
@@ -54,12 +55,12 @@ void deinitRuntime() {
assert(PM && "Runtime not initialized");
if (RefCount == 1) {
- DP("Deinit offload library!\n");
+ ODBG(ODT_Deinit) << "Deinit offload library!";
// RTL deinitialization has started
RTLAlive = false;
while (RTLOngoingSyncs > 0) {
- DP("Waiting for ongoing syncs to finish, count: %d\n",
- RTLOngoingSyncs.load());
+ ODBG(ODT_Sync) << "Waiting for ongoing syncs to finish, count:"
+ << RTLOngoingSyncs.load();
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
PM->deinit();
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index dd83a3c..6e85e57 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -40,6 +40,8 @@ EXTERN void ompx_dump_mapping_tables() {
using namespace llvm::omp::target::ompt;
#endif
+using GenericDeviceTy = llvm::omp::target::plugin::GenericDeviceTy;
+
void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
const char *Name);
void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
@@ -68,6 +70,62 @@ EXTERN int omp_get_device_num(void) {
return HostDevice;
}
+static inline bool is_initial_device_uid(const char *DeviceUid) {
+ return strcmp(DeviceUid, GenericPluginTy::getHostDeviceUid()) == 0;
+}
+
+EXTERN int omp_get_device_from_uid(const char *DeviceUid) {
+ TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+
+ if (!DeviceUid) {
+ DP("Call to omp_get_device_from_uid returning omp_invalid_device\n");
+ return omp_invalid_device;
+ }
+ if (is_initial_device_uid(DeviceUid)) {
+ DP("Call to omp_get_device_from_uid returning initial device number %d\n",
+ omp_get_initial_device());
+ return omp_get_initial_device();
+ }
+
+ int DeviceNum = omp_invalid_device;
+
+ auto ExclusiveDevicesAccessor = PM->getExclusiveDevicesAccessor();
+ for (const DeviceTy &Device : PM->devices(ExclusiveDevicesAccessor)) {
+ const char *Uid = Device.RTL->getDevice(Device.RTLDeviceID).getDeviceUid();
+ if (Uid && strcmp(DeviceUid, Uid) == 0) {
+ DeviceNum = Device.DeviceID;
+ break;
+ }
+ }
+
+ DP("Call to omp_get_device_from_uid returning %d\n", DeviceNum);
+ return DeviceNum;
+}
+
+EXTERN const char *omp_get_uid_from_device(int DeviceNum) {
+ TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+
+ if (DeviceNum == omp_invalid_device) {
+ DP("Call to omp_get_uid_from_device returning nullptr\n");
+ return nullptr;
+ }
+ if (DeviceNum == omp_get_initial_device()) {
+ DP("Call to omp_get_uid_from_device returning initial device UID\n");
+ return GenericPluginTy::getHostDeviceUid();
+ }
+
+ auto DeviceOrErr = PM->getDevice(DeviceNum);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+ const char *Uid =
+ DeviceOrErr->RTL->getDevice(DeviceOrErr->RTLDeviceID).getDeviceUid();
+ DP("Call to omp_get_uid_from_device returning %s\n", Uid);
+ return Uid;
+}
+
EXTERN int omp_get_initial_device(void) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
diff --git a/offload/libomptarget/PluginManager.cpp b/offload/libomptarget/PluginManager.cpp
index c8d6b42..cd6d037 100644
--- a/offload/libomptarget/PluginManager.cpp
+++ b/offload/libomptarget/PluginManager.cpp
@@ -36,7 +36,7 @@ void PluginManager::init() {
return;
}
- DP("Loading RTLs...\n");
+ ODBG("Init") << "Loading RTLs";
// Attempt to create an instance of each supported plugin.
#define PLUGIN_TARGET(Name) \
@@ -437,20 +437,22 @@ static int loadImagesOntoDevice(DeviceTy &Device) {
llvm::offloading::EntryTy DeviceEntry = Entry;
if (Entry.Size) {
- if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName,
- &DeviceEntry.Address) != OFFLOAD_SUCCESS)
- REPORT("Failed to load symbol %s\n", Entry.SymbolName);
+ if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE))
+ if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName,
+ &DeviceEntry.Address) != OFFLOAD_SUCCESS)
+ REPORT("Failed to load symbol %s\n", Entry.SymbolName);
// If unified memory is active, the corresponding global is a device
// reference to the host global. We need to initialize the pointer on
// the device to point to the memory on the host.
- if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
- (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) {
+ if (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) &&
+ !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) &&
+ ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
+ (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)))
if (Device.RTL->data_submit(DeviceId, DeviceEntry.Address,
Entry.Address,
Entry.Size) != OFFLOAD_SUCCESS)
REPORT("Failed to write symbol for USM %s\n", Entry.SymbolName);
- }
} else if (Entry.Address) {
if (Device.RTL->get_function(Binary, Entry.SymbolName,
&DeviceEntry.Address) != OFFLOAD_SUCCESS)
diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index ee36fbe..659ef68 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -38,6 +38,7 @@ using namespace llvm::omp::target::ompt;
#endif
using namespace llvm::omp::target::plugin;
+using namespace llvm::omp::target::debug;
int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
AsyncInfoTy &AsyncInfo) const {
@@ -48,7 +49,7 @@ int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
void *Event = getEvent();
bool NeedNewEvent = Event == nullptr;
if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) {
- REPORT("Failed to create event\n");
+ REPORT() << "Failed to create event";
return OFFLOAD_FAIL;
}
@@ -56,7 +57,7 @@ int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
// know if the target support event. But if a target doesn't,
// recordEvent should always return success.
if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
- REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event));
+ REPORT() << "Failed to set dependence on event " << Event;
return OFFLOAD_FAIL;
}
@@ -112,21 +113,58 @@ setupIndirectCallTable(DeviceTy &Device, __tgt_device_image *Image,
llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable;
for (const auto &Entry : Entries) {
if (Entry.Kind != llvm::object::OffloadKind::OFK_OpenMP ||
- Entry.Size == 0 || !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT))
+ Entry.Size == 0 ||
+ (!(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT) &&
+ !(Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE)))
continue;
- assert(Entry.Size == sizeof(void *) && "Global not a function pointer?");
- auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
-
- void *Ptr;
- if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr))
- return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
- "failed to load %s", Entry.SymbolName);
-
- HstPtr = Entry.Address;
- if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo))
- return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
- "failed to load %s", Entry.SymbolName);
+ size_t PtrSize = sizeof(void *);
+ if (Entry.Flags & OMP_DECLARE_TARGET_INDIRECT_VTABLE) {
+ // This is a VTable entry, the current entry is the first index of the
+ // VTable and Entry.Size is the total size of the VTable. Unlike the
+ // indirect function case below, the Global is not of size Entry.Size and
+ // is instead of size PtrSize (sizeof(void*)).
+ void *Vtable;
+ void *res;
+ if (Device.RTL->get_global(Binary, PtrSize, Entry.SymbolName, &Vtable))
+ return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+ "failed to load %s", Entry.SymbolName);
+
+ // HstPtr = Entry.Address;
+ if (Device.retrieveData(&res, Vtable, PtrSize, AsyncInfo))
+ return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+ "failed to load %s", Entry.SymbolName);
+ if (Device.synchronize(AsyncInfo))
+ return error::createOffloadError(
+ error::ErrorCode::INVALID_BINARY,
+ "failed to synchronize after retrieving %s", Entry.SymbolName);
+ // Calculate and emplace entire Vtable from first Vtable byte
+ for (uint64_t i = 0; i < Entry.Size / PtrSize; ++i) {
+ auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
+ HstPtr = reinterpret_cast<void *>(
+ reinterpret_cast<uintptr_t>(Entry.Address) + i * PtrSize);
+ DevPtr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(res) +
+ i * PtrSize);
+ }
+ } else {
+ // Indirect function case: Entry.Size should equal PtrSize since we're
+ // dealing with a single function pointer (not a VTable)
+ assert(Entry.Size == PtrSize && "Global not a function pointer?");
+ auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
+ void *Ptr;
+ if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName, &Ptr))
+ return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+ "failed to load %s", Entry.SymbolName);
+
+ HstPtr = Entry.Address;
+ if (Device.retrieveData(&DevPtr, Ptr, Entry.Size, AsyncInfo))
+ return error::createOffloadError(error::ErrorCode::INVALID_BINARY,
+ "failed to load %s", Entry.SymbolName);
+ }
+ if (Device.synchronize(AsyncInfo))
+ return error::createOffloadError(
+ error::ErrorCode::INVALID_BINARY,
+ "failed to synchronize after retrieving %s", Entry.SymbolName);
}
// If we do not have any indirect globals we exit early.
@@ -278,21 +316,21 @@ int32_t DeviceTy::dataFence(AsyncInfoTy &AsyncInfo) {
}
int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) {
- DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n",
- DPxPTR(HstPtr), Size);
+ ODBG(ODT_Mapping) << "Notifying about new mapping: HstPtr=" << HstPtr
+ << ", Size=" << Size;
if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) {
- REPORT("Notifying about data mapping failed.\n");
+ REPORT() << "Notifying about data mapping failed.";
return OFFLOAD_FAIL;
}
return OFFLOAD_SUCCESS;
}
int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) {
- DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n", DPxPTR(HstPtr));
+ ODBG(ODT_Mapping) << "Notifying about an unmapping: HstPtr=" << HstPtr;
if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) {
- REPORT("Notifying about data unmapping failed.\n");
+ REPORT() << "Notifying about data unmapping failed.";
return OFFLOAD_FAIL;
}
return OFFLOAD_SUCCESS;
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 910a5b6..2ebc23e 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -40,6 +40,8 @@ VERS1.0 {
omp_get_mapped_ptr;
omp_get_num_devices;
omp_get_device_num;
+ omp_get_device_from_uid;
+ omp_get_uid_from_device;
omp_get_initial_device;
omp_target_alloc;
omp_target_free;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 04b3944..5196509 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2186,6 +2186,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (auto Err = checkIfAPU())
return Err;
+ // Retrieve the size of the group memory.
+ for (const auto *Pool : AllMemoryPools) {
+ if (Pool->isGroup()) {
+ if (auto Err = Pool->getAttr(HSA_AMD_MEMORY_POOL_INFO_SIZE,
+ MaxBlockSharedMemSize))
+ return Err;
+ break;
+ }
+ }
+
return Plugin::success();
}
@@ -2923,6 +2933,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Status == HSA_STATUS_SUCCESS)
Info.add("Cacheline Size", TmpUInt);
+ Info.add("Max Shared Memory per Work Group", MaxBlockSharedMemSize, "bytes",
+ DeviceInfo::WORK_GROUP_LOCAL_MEM_SIZE);
+
Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Max Clock Freq", TmpUInt, "MHz",
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 2135e06..1d52c96 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -794,6 +794,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Get the unique identifier of the device.
const char *getDeviceUid() const { return DeviceUid.c_str(); }
+ /// Get the total shared memory per block (in bytes) that can be used in any
+ /// kernel.
+ size_t getMaxBlockSharedMemSize() const { return MaxBlockSharedMemSize; }
+
/// Set the context of the device if needed, before calling device-specific
/// functions. Plugins may implement this function as a no-op if not needed.
virtual Error setContext() = 0;
@@ -1251,6 +1255,9 @@ protected:
/// Internal representation for OMPT device (initialize & finalize)
std::atomic<bool> OmptInitialized;
#endif
+
+ /// The total per-block native shared memory that a kernel may use.
+ size_t MaxBlockSharedMemSize = 0;
};
/// Class implementing common functionalities of offload plugins. Each plugin
diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index e19f2ef..8d67047 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -83,7 +83,8 @@ static rpc::Status handleOffloadOpcodes(plugin::GenericDeviceTy &Device,
return rpc::RPC_ERROR;
}
-static rpc::Status runServer(plugin::GenericDeviceTy &Device, void *Buffer) {
+static rpc::Status runServer(plugin::GenericDeviceTy &Device, void *Buffer,
+ bool &ClientInUse) {
uint64_t NumPorts =
std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT);
rpc::Server Server(NumPorts, Buffer);
@@ -92,6 +93,7 @@ static rpc::Status runServer(plugin::GenericDeviceTy &Device, void *Buffer) {
if (!Port)
return rpc::RPC_SUCCESS;
+ ClientInUse = true;
rpc::Status Status =
handleOffloadOpcodes(Device, *Port, Device.getWarpSize());
@@ -99,7 +101,6 @@ static rpc::Status runServer(plugin::GenericDeviceTy &Device, void *Buffer) {
if (Status == rpc::RPC_UNHANDLED_OPCODE)
Status = LIBC_NAMESPACE::shared::handle_libc_opcodes(*Port,
Device.getWarpSize());
-
Port->close();
return Status;
@@ -122,7 +123,11 @@ void RPCServerTy::ServerThread::shutDown() {
}
void RPCServerTy::ServerThread::run() {
+ static constexpr auto IdleTime = std::chrono::microseconds(25);
+ static constexpr auto IdleSleep = std::chrono::microseconds(250);
std::unique_lock<decltype(Mutex)> Lock(Mutex);
+
+ auto LastUse = std::chrono::steady_clock::now();
for (;;) {
CV.wait(Lock, [&]() {
return NumUsers.load(std::memory_order_acquire) > 0 ||
@@ -133,15 +138,25 @@ void RPCServerTy::ServerThread::run() {
return;
Lock.unlock();
+ bool ClientInUse = false;
while (NumUsers.load(std::memory_order_relaxed) > 0 &&
Running.load(std::memory_order_relaxed)) {
+
+ // Suspend this thread briefly if there is no current work.
+ auto Now = std::chrono::steady_clock::now();
+ if (!ClientInUse && Now - LastUse >= IdleTime)
+ std::this_thread::sleep_for(IdleSleep);
+ else if (ClientInUse)
+ LastUse = Now;
+
+ ClientInUse = false;
std::lock_guard<decltype(Mutex)> Lock(BufferMutex);
for (const auto &[Buffer, Device] : llvm::zip_equal(Buffers, Devices)) {
if (!Buffer || !Device)
continue;
// If running the server failed, print a message but keep running.
- if (runServer(*Device, Buffer) != rpc::RPC_SUCCESS)
+ if (runServer(*Device, Buffer, ClientInUse) != rpc::RPC_SUCCESS)
FAILURE_MESSAGE("Unhandled or invalid RPC opcode!");
}
}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 45e580e7..a27c6f3 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -379,6 +379,12 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Err;
HardwareParallelism = NumMuliprocessors * (MaxThreadsPerSM / WarpSize);
+ uint32_t MaxSharedMem;
+ if (auto Err = getDeviceAttr(
+ CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, MaxSharedMem))
+ return Err;
+ MaxBlockSharedMemSize = MaxSharedMem;
+
return Plugin::success();
}
@@ -1089,10 +1095,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
if (Res == CUDA_SUCCESS)
Info.add("Total Constant Memory", TmpInt, "bytes");
- Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
- TmpInt);
- if (Res == CUDA_SUCCESS)
- Info.add("Max Shared Memory per Block", TmpInt, "bytes");
+ Info.add("Max Shared Memory per Block", MaxBlockSharedMemSize, "bytes",
+ DeviceInfo::WORK_GROUP_LOCAL_MEM_SIZE);
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, TmpInt);
if (Res == CUDA_SUCCESS)
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 48de1fef..8fa1c3e 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -443,6 +443,8 @@ struct GenELF64PluginTy final : public GenericPluginTy {
if (auto Err = Plugin::check(ffi_init(), "failed to initialize libffi"))
return std::move(Err);
#endif
+ ODBG("Init") << "GenELF64 plugin detected " << ODBG_IF_LEVEL(2)
+ << NUM_DEVICES << " " << ODBG_RESET_LEVEL() << "devices";
return NUM_DEVICES;
}
diff --git a/offload/test/api/omp_device_uid.c b/offload/test/api/omp_device_uid.c
new file mode 100644
index 0000000..2a41d8d
--- /dev/null
+++ b/offload/test/api/omp_device_uid.c
@@ -0,0 +1,76 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+
+#include <omp.h>
+#include <stdio.h>
+#include <string.h>
+
+int test_omp_device_uid(int device_num) {
+ const char *device_uid = omp_get_uid_from_device(device_num);
+ if (device_uid == NULL) {
+ printf("FAIL for device %d: omp_get_uid_from_device returned NULL\n",
+ device_num);
+ return 0;
+ }
+
+ int device_num_from_uid = omp_get_device_from_uid(device_uid);
+ if (device_num_from_uid != device_num) {
+ printf(
+ "FAIL for device %d: omp_get_device_from_uid returned %d (UID: %s)\n",
+ device_num, device_num_from_uid, device_uid);
+ return 0;
+ }
+
+ if (device_num == omp_get_initial_device())
+ return 1;
+
+ int success = 1;
+
+// Note that the following code may be executed on the host if the host is the
+// device
+#pragma omp target map(tofrom : success) device(device_num)
+ {
+ int device_num = omp_get_device_num();
+
+ // omp_get_uid_from_device() in the device runtime is a dummy function
+ // returning NULL
+ const char *device_uid = omp_get_uid_from_device(device_num);
+
+ // omp_get_device_from_uid() in the device runtime is a dummy function
+ // returning omp_invalid_device.
+ int device_num_from_uid = omp_get_device_from_uid(device_uid);
+
+ // Depending on whether we're executing on the device or the host, we either
+ // got NULL as the device UID or the correct device UID. Consequently,
+ // omp_get_device_from_uid() either returned omp_invalid_device or the
+ // correct device number (aka omp_get_initial_device()).
+ if (device_uid ? device_num_from_uid != device_num
+ : device_num_from_uid != omp_invalid_device) {
+ printf("FAIL for device %d (target): omp_get_device_from_uid returned %d "
+ "(UID: %s)\n",
+ device_num, device_num_from_uid, device_uid);
+ success = 0;
+ }
+ }
+
+ return success;
+}
+
+int main() {
+ int num_devices = omp_get_num_devices();
+ int num_failed = 0;
+ // (also test initial device aka num_devices)
+ for (int i = 0; i < num_devices + 1; i++) {
+ if (!test_omp_device_uid(i)) {
+ printf("FAIL for device %d\n", i);
+ num_failed++;
+ }
+ }
+ if (num_failed) {
+ printf("FAIL\n");
+ return 1;
+ }
+ printf("PASS\n");
+ return 0;
+}
+
+// CHECK: PASS
diff --git a/offload/test/api/omp_indirect_call_table_manual.c b/offload/test/api/omp_indirect_call_table_manual.c
new file mode 100644
index 0000000..e958d47
--- /dev/null
+++ b/offload/test/api/omp_indirect_call_table_manual.c
@@ -0,0 +1,107 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+#include <assert.h>
+#include <omp.h>
+#include <stdio.h>
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL
+
+typedef struct {
+ uint64_t Reserved;
+ uint16_t Version;
+ uint16_t Kind; // OpenMP==1
+ uint32_t Flags;
+ void *Address;
+ char *SymbolName;
+ uint64_t Size;
+ uint64_t Data;
+ void *AuxAddr;
+} __tgt_offload_entry;
+
+enum OpenMPOffloadingDeclareTargetFlags {
+ /// Mark the entry global as having a 'link' attribute.
+ OMP_DECLARE_TARGET_LINK = 0x01,
+ /// Mark the entry global as being an indirectly callable function.
+ OMP_DECLARE_TARGET_INDIRECT = 0x08,
+ /// This is an entry corresponding to a requirement to be registered.
+ OMP_REGISTER_REQUIRES = 0x10,
+ /// Mark the entry global as being an indirect vtable.
+ OMP_DECLARE_TARGET_INDIRECT_VTABLE = 0x20,
+};
+
+#pragma omp begin declare variant match(device = {kind(gpu)})
+// Provided by the runtime.
+void *__llvm_omp_indirect_call_lookup(void *host_ptr);
+#pragma omp declare target to(__llvm_omp_indirect_call_lookup) \
+ device_type(nohost)
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {kind(cpu)})
+// We assume unified addressing on the CPU target.
+void *__llvm_omp_indirect_call_lookup(void *host_ptr) { return host_ptr; }
+#pragma omp end declare variant
+
+#pragma omp begin declare target
+void foo(int *i) { *i += 1; }
+void bar(int *i) { *i += 10; }
+void baz(int *i) { *i += 100; }
+#pragma omp end declare target
+
+typedef void (*fptr_t)(int *i);
+
+// Dispatch Table - declare separately on host and device to avoid
+// registering with the library; this also allows us to use separate
+// names, which is convenient for debugging. This dispatchTable is
+// intended to mimic what Clang emits for C++ vtables.
+fptr_t dispatchTable[] = {foo, bar, baz};
+#pragma omp begin declare target device_type(nohost)
+fptr_t GPUdispatchTable[] = {foo, bar, baz};
+fptr_t *GPUdispatchTablePtr = GPUdispatchTable;
+#pragma omp end declare target
+
+// Define "manual" OpenMP offload entries, where we emit Clang
+// offloading entry structure definitions in the appropriate ELF
+// section. This allows us to emulate the offloading entries that Clang would
+// normally emit for us
+
+__attribute__((weak, section("llvm_offload_entries"), aligned(8)))
+const __tgt_offload_entry __offloading_entry[] = {{
+ 0ULL, // Reserved
+ 1, // Version
+ 1, // Kind
+ OMP_DECLARE_TARGET_INDIRECT_VTABLE, // Flags
+ &dispatchTable, // Address
+ "GPUdispatchTablePtr", // SymbolName
+ (size_t)(sizeof(dispatchTable)), // Size
+ 0ULL, // Data
+ NULL // AuxAddr
+}};
+
+// Mimic how Clang emits vtable pointers for C++ classes
+typedef struct {
+ fptr_t *dispatchPtr;
+} myClass;
+
+// ---------------------------------------------------------------------------
+int main() {
+ myClass obj_foo = {dispatchTable + 0};
+ myClass obj_bar = {dispatchTable + 1};
+ myClass obj_baz = {dispatchTable + 2};
+ int aaa = 0;
+
+#pragma omp target map(aaa) map(to : obj_foo, obj_bar, obj_baz)
+ {
+ // Lookup
+ fptr_t *foo_ptr = __llvm_omp_indirect_call_lookup(obj_foo.dispatchPtr);
+ fptr_t *bar_ptr = __llvm_omp_indirect_call_lookup(obj_bar.dispatchPtr);
+ fptr_t *baz_ptr = __llvm_omp_indirect_call_lookup(obj_baz.dispatchPtr);
+ foo_ptr[0](&aaa);
+ bar_ptr[0](&aaa);
+ baz_ptr[0](&aaa);
+ }
+
+ assert(aaa == 111);
+ // CHECK: PASS
+ printf("PASS\n");
+ return 0;
+}
diff --git a/offload/test/lit.site.cfg.in b/offload/test/lit.site.cfg.in
index 00f4e2b..c8ba45c 100644
--- a/offload/test/lit.site.cfg.in
+++ b/offload/test/lit.site.cfg.in
@@ -1,6 +1,6 @@
@AUTO_GEN_COMMENT@
-config.bin_llvm_tools_dir = "@LLVM_RUNTIME_OUTPUT_INTDIR@"
+config.bin_llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@"
config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@"
config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@"
config.test_fortran_compiler="@OPENMP_TEST_Fortran_COMPILER@"
diff --git a/offload/test/mapping/use_device_addr/target_data_use_device_addr_arrsec_fallback.c b/offload/test/mapping/use_device_addr/target_data_use_device_addr_arrsec_fallback.c
new file mode 100644
index 0000000..4b67a3b
--- /dev/null
+++ b/offload/test/mapping/use_device_addr/target_data_use_device_addr_arrsec_fallback.c
@@ -0,0 +1,24 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+// Test that when a use_device_addr lookup fails, the
+// list-item retains its original address by default.
+//
+// This is necessary because we must assume that the
+// list-item is device-accessible, even if it was not
+// previously mapped.
+
+// XFAIL: *
+
+#include <stdio.h>
+int h[10];
+int *ph = &h[0];
+
+void f1() {
+ printf("%p\n", &h[2]); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_addr(h[2])
+ printf("%p\n", &h[2]); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+#pragma omp target data use_device_addr(ph[2])
+ printf("%p\n", &ph[2]); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+}
+
+int main() { f1(); }
diff --git a/offload/test/mapping/use_device_addr/target_data_use_device_addr_var_fallback.c b/offload/test/mapping/use_device_addr/target_data_use_device_addr_var_fallback.c
new file mode 100644
index 0000000..4495a46b
--- /dev/null
+++ b/offload/test/mapping/use_device_addr/target_data_use_device_addr_var_fallback.c
@@ -0,0 +1,21 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+// Test that when a use_device_addr lookup fails, the
+// list-item retains its original address by default.
+//
+// This is necessary because we must assume that the
+// list-item is device-accessible, even if it was not
+// previously mapped.
+
+// XFAIL: *
+
+#include <stdio.h>
+int x;
+
+void f1() {
+ printf("%p\n", &x); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_addr(x)
+ printf("%p\n", &x); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+}
+
+int main() { f1(); }
diff --git a/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.c b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.c
new file mode 100644
index 0000000..e8fa3b6
--- /dev/null
+++ b/offload/test/mapping/use_device_ptr/target_data_use_device_ptr_var_fallback.c
@@ -0,0 +1,32 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+// Test that when a use_device_ptr lookup fails, the
+// privatized pointer retains its original value by
+// default.
+//
+// This is necessary because we must assume that the
+// pointee is device-accessible, even if it was not
+// previously mapped.
+//
+// OpenMP 5.1, sec 2.14.2, target data construct, p 188, l26-31:
+// If a list item that appears in a use_device_ptr clause ... does not point to
+// a mapped object, it must contain a valid device address for the target
+// device, and the list item references are instead converted to references to a
+// local device pointer that refers to this device address.
+//
+// Note: OpenMP 6.1 will have a way to change the
+// fallback behavior: preserve or nullify.
+
+// XFAIL: *
+
+#include <stdio.h>
+int x;
+int *xp = &x;
+
+void f1() {
+ printf("%p\n", xp); // CHECK: 0x[[#%x,ADDR:]]
+#pragma omp target data use_device_ptr(xp)
+ printf("%p\n", xp); // CHECK-NEXT: 0x{{0*}}[[#ADDR]]
+}
+
+int main() { f1(); }
diff --git a/offload/test/offloading/fortran/declare-target-to-allocatable-vars-in-target-with-update.f90 b/offload/test/offloading/fortran/declare-target-to-allocatable-vars-in-target-with-update.f90
new file mode 100644
index 0000000..727a08b
--- /dev/null
+++ b/offload/test/offloading/fortran/declare-target-to-allocatable-vars-in-target-with-update.f90
@@ -0,0 +1,41 @@
+! Test that checks an allocatable array can be marked implicit
+! `declare target to` and functions without issue.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+module test
+ implicit none
+ integer, allocatable, dimension(:) :: alloca_arr
+ !$omp declare target(alloca_arr)
+end module test
+
+program main
+ use test
+ implicit none
+ integer :: cycle, i
+
+ allocate(alloca_arr(10))
+
+ do i = 1, 10
+ alloca_arr(i) = 0
+ end do
+
+ !$omp target data map(to:alloca_arr)
+ do cycle = 1, 2
+ !$omp target
+ do i = 1, 10
+ alloca_arr(i) = alloca_arr(i) + i
+ end do
+ !$omp end target
+
+ ! NOTE: Technically doesn't affect the results, but there is a
+ ! regression case that'll cause a runtime crash if this is
+ ! invoked more than once, so this checks for that.
+ !$omp target update from(alloca_arr)
+ end do
+ !$omp end target data
+
+ print *, alloca_arr
+end program
+
+! CHECK: 2 4 6 8 10 12 14 16 18 20
diff --git a/offload/test/offloading/fortran/declare-target-to-vars-target-region-and-update.f90 b/offload/test/offloading/fortran/declare-target-to-vars-target-region-and-update.f90
new file mode 100644
index 0000000..16433af
--- /dev/null
+++ b/offload/test/offloading/fortran/declare-target-to-vars-target-region-and-update.f90
@@ -0,0 +1,40 @@
+! Test the implicit `declare target to` interaction with `target update from`
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+module test
+ implicit none
+ integer :: array(10)
+ !$omp declare target(array)
+end module test
+
+PROGRAM main
+ use test
+ implicit none
+ integer :: i
+
+ do i = 1, 10
+ array(i) = 0
+ end do
+
+ !$omp target
+ do i = 1, 10
+ array(i) = i
+ end do
+ !$omp end target
+
+ !$omp target
+ do i = 1, 10
+ array(i) = array(i) + i
+ end do
+ !$omp end target
+
+ print *, array
+
+ !$omp target update from(array)
+
+ print *, array
+END PROGRAM
+
+! CHECK: 0 0 0 0 0 0 0 0 0 0
+! CHECK: 2 4 6 8 10 12 14 16 18 20
diff --git a/offload/test/offloading/fortran/declare-target-to-zero-index-allocatable-target-map.f90 b/offload/test/offloading/fortran/declare-target-to-zero-index-allocatable-target-map.f90
new file mode 100644
index 0000000..0d650f6
--- /dev/null
+++ b/offload/test/offloading/fortran/declare-target-to-zero-index-allocatable-target-map.f90
@@ -0,0 +1,30 @@
+! Test `declare target to` interaction with an allocatable with a non-default
+! range
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+module test_0
+ real(4), allocatable :: zero_off(:)
+ !$omp declare target(zero_off)
+end module test_0
+
+program main
+ use test_0
+ implicit none
+
+ allocate(zero_off(0:10))
+
+ zero_off(0) = 30.0
+ zero_off(1) = 40.0
+ zero_off(10) = 25.0
+
+ !$omp target map(tofrom: zero_off)
+ zero_off(0) = zero_off(1)
+ !$omp end target
+
+ print *, zero_off(0)
+ print *, zero_off(1)
+end program
+
+! CHECK: 40.
+! CHECK: 40.
diff --git a/offload/test/offloading/fortran/dtype-member-overlap-map.f90 b/offload/test/offloading/fortran/dtype-member-overlap-map.f90
new file mode 100644
index 0000000..e457014
--- /dev/null
+++ b/offload/test/offloading/fortran/dtype-member-overlap-map.f90
@@ -0,0 +1,56 @@
+! Basic offloading test checking the interaction of an overlapping
+! member map.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ implicit none
+ integer :: i
+
+ type dtype2
+ integer :: int
+ real :: float
+ end type dtype2
+
+ type dtype1
+ character (LEN=30) :: characters
+ type(dtype2) :: internal_dtype2
+ end type dtype1
+
+ type dtype
+ integer :: elements(10)
+ type(dtype1) :: internal_dtype
+ integer :: value
+ end type dtype
+
+ type (dtype) :: single_dtype
+
+ do i = 1, 10
+ single_dtype%elements(i) = 0
+ end do
+
+ !$omp target map(tofrom: single_dtype%internal_dtype, single_dtype%internal_dtype%internal_dtype2%int)
+ single_dtype%internal_dtype%internal_dtype2%int = 123
+ single_dtype%internal_dtype%characters(1:1) = "Z"
+ !$omp end target
+
+ !$omp target map(to: single_dtype) map(tofrom: single_dtype%internal_dtype%internal_dtype2, single_dtype%value)
+ single_dtype%value = 20
+ do i = 1, 10
+ single_dtype%elements(i) = i
+ end do
+ single_dtype%internal_dtype%internal_dtype2%float = 32.0
+ !$omp end target
+
+ print *, single_dtype%value
+ print *, single_dtype%internal_dtype%internal_dtype2%float
+ print *, single_dtype%elements
+ print *, single_dtype%internal_dtype%internal_dtype2%int
+ print *, single_dtype%internal_dtype%characters(1:1)
+end program main
+
+! CHECK: 20
+! CHECK: 32.
+! CHECK: 0 0 0 0 0 0 0 0 0 0
+! CHECK: 123
+! CHECK: Z
diff --git a/offload/test/offloading/fortran/implicit-derived-enter-exit.f90 b/offload/test/offloading/fortran/implicit-derived-enter-exit.f90
new file mode 100644
index 0000000..0c896e64
--- /dev/null
+++ b/offload/test/offloading/fortran/implicit-derived-enter-exit.f90
@@ -0,0 +1,65 @@
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
+
+module enter_exit_mapper_mod
+ implicit none
+
+ type :: field_type
+ real, allocatable :: values(:)
+ end type field_type
+
+ type :: tile_type
+ type(field_type) :: field
+ integer, allocatable :: neighbors(:)
+ end type tile_type
+
+contains
+ subroutine init_tile(tile)
+ type(tile_type), intent(inout) :: tile
+ integer :: j
+
+ allocate(tile%field%values(4))
+ allocate(tile%neighbors(4))
+ do j = 1, 4
+ tile%field%values(j) = 10.0 * j
+ tile%neighbors(j) = j
+ end do
+ end subroutine init_tile
+
+end module enter_exit_mapper_mod
+
+program implicit_enter_exit
+ use enter_exit_mapper_mod
+ implicit none
+ integer :: j
+ type(tile_type) :: tile
+
+ call init_tile(tile)
+
+ !$omp target enter data map(alloc: tile%field%values)
+
+ !$omp target
+ do j = 1, size(tile%field%values)
+ tile%field%values(j) = 5.0 * j
+ end do
+ !$omp end target
+
+ !$omp target exit data map(from: tile%field%values)
+
+ do j = 1, size(tile%field%values)
+ if (tile%field%values(j) /= 5.0 * j) then
+ print *, "======= Test Failed! ======="
+ stop 1
+ end if
+ if (tile%neighbors(j) /= j) then
+ print *, "======= Test Failed! ======="
+ stop 1
+ end if
+ end do
+
+ print *, "======= Test Passed! ======="
+end program implicit_enter_exit
+
+! CHECK: ======= Test Passed! =======
diff --git a/offload/test/offloading/fortran/target-custom-reduction-derivedtype.f90 b/offload/test/offloading/fortran/target-custom-reduction-derivedtype.f90
new file mode 100644
index 0000000..cc390cf0
--- /dev/null
+++ b/offload/test/offloading/fortran/target-custom-reduction-derivedtype.f90
@@ -0,0 +1,88 @@
+! Basic offloading test with custom OpenMP reduction on derived type
+! REQUIRES: flang, amdgpu
+!
+! RUN: %libomptarget-compile-fortran-generic
+! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic
+module maxtype_mod
+ implicit none
+
+ type maxtype
+ integer::sumval
+ integer::maxval
+ end type maxtype
+
+contains
+
+ subroutine initme(x,n)
+ type(maxtype) :: x,n
+ x%sumval=0
+ x%maxval=0
+ end subroutine initme
+
+ function mycombine(lhs, rhs)
+ type(maxtype) :: lhs, rhs
+ type(maxtype) :: mycombine
+ mycombine%sumval = lhs%sumval + rhs%sumval
+ mycombine%maxval = max(lhs%maxval, rhs%maxval)
+ end function mycombine
+
+end module maxtype_mod
+
+program main
+ use maxtype_mod
+ implicit none
+
+ integer :: n = 100
+ integer :: i
+ integer :: error = 0
+ type(maxtype) :: x(100)
+ type(maxtype) :: res
+ integer :: expected_sum, expected_max
+
+!$omp declare reduction(red_add_max:maxtype:omp_out=mycombine(omp_out,omp_in)) initializer(initme(omp_priv,omp_orig))
+
+ ! Initialize array with test data
+ do i = 1, n
+ x(i)%sumval = i
+ x(i)%maxval = i
+ end do
+
+ ! Initialize reduction variable
+ res%sumval = 0
+ res%maxval = 0
+
+ ! Perform reduction in target region
+ !$omp target parallel do map(to:x) reduction(red_add_max:res)
+ do i = 1, n
+ res = mycombine(res, x(i))
+ end do
+ !$omp end target parallel do
+
+ ! Compute expected values
+ expected_sum = 0
+ expected_max = 0
+ do i = 1, n
+ expected_sum = expected_sum + i
+ expected_max = max(expected_max, i)
+ end do
+
+ ! Check results
+ if (res%sumval /= expected_sum) then
+ error = 1
+ endif
+
+ if (res%maxval /= expected_max) then
+ error = 1
+ endif
+
+ if (error == 0) then
+ print *,"PASSED"
+ else
+ print *,"FAILED"
+ endif
+
+end program main
+
+! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}}
+! CHECK: PASSED
+
diff --git a/offload/test/offloading/fortran/target-is-device-ptr.f90 b/offload/test/offloading/fortran/target-is-device-ptr.f90
new file mode 100644
index 0000000..d6d8c02
--- /dev/null
+++ b/offload/test/offloading/fortran/target-is-device-ptr.f90
@@ -0,0 +1,60 @@
+! Validate that a device pointer obtained via omp_get_mapped_ptr can be used
+! inside a TARGET region with the is_device_ptr clause.
+! REQUIRES: flang, amdgcn-amd-amdhsa
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+
+module mod
+ implicit none
+ integer, parameter :: n = 4
+contains
+ subroutine kernel(dptr)
+ use iso_c_binding, only : c_ptr, c_f_pointer
+ implicit none
+
+ type(c_ptr) :: dptr
+ integer, dimension(:), pointer :: b
+ integer :: i
+
+ b => null()
+
+ !$omp target is_device_ptr(dptr)
+ call c_f_pointer(dptr, b, [n])
+ do i = 1, n
+ b(i) = b(i) + 1
+ end do
+ !$omp end target
+ end subroutine kernel
+end module mod
+
+program is_device_ptr_target
+ use iso_c_binding, only : c_ptr, c_loc, c_f_pointer
+ use omp_lib, only: omp_get_default_device, omp_get_mapped_ptr
+ use mod, only: kernel, n
+ implicit none
+
+ integer, dimension(n), target :: a
+ integer :: dev
+ type(c_ptr) :: dptr
+
+ a = [2, 4, 6, 8]
+ print '("BEFORE:", I3)', a
+
+ dev = omp_get_default_device()
+
+ !$omp target data map(tofrom: a)
+ dptr = omp_get_mapped_ptr(c_loc(a), dev)
+ call kernel(dptr)
+ !$omp end target data
+
+ print '("AFTER: ", I3)', a
+
+ if (all(a == [3, 5, 7, 9])) then
+ print '("PASS")'
+ else
+ print '("FAIL ", I3)', a
+ end if
+
+end program is_device_ptr_target
+
+!CHECK: PASS
diff --git a/offload/test/offloading/gpupgo/pgo_atomic_teams.c b/offload/test/offloading/gpupgo/pgo_atomic_teams.c
index 42d8ae4..b3b72db 100644
--- a/offload/test/offloading/gpupgo/pgo_atomic_teams.c
+++ b/offload/test/offloading/gpupgo/pgo_atomic_teams.c
@@ -18,7 +18,6 @@
// REQUIRES: amdgpu
// REQUIRES: pgo
-// XFAIL: amdgpu
int test1(int a) { return a / 2; }
int test2(int a) { return a * 2; }
diff --git a/offload/test/offloading/gpupgo/pgo_atomic_threads.c b/offload/test/offloading/gpupgo/pgo_atomic_threads.c
index 09a4dc1..440a6b5 100644
--- a/offload/test/offloading/gpupgo/pgo_atomic_threads.c
+++ b/offload/test/offloading/gpupgo/pgo_atomic_threads.c
@@ -18,7 +18,6 @@
// REQUIRES: amdgpu
// REQUIRES: pgo
-// XFAIL: amdgpu
int test1(int a) { return a / 2; }
diff --git a/offload/test/offloading/gpupgo/pgo_device_and_host.c b/offload/test/offloading/gpupgo/pgo_device_and_host.c
index c53e69a..3e95791 100644
--- a/offload/test/offloading/gpupgo/pgo_device_and_host.c
+++ b/offload/test/offloading/gpupgo/pgo_device_and_host.c
@@ -50,7 +50,6 @@
// REQUIRES: amdgpu
// REQUIRES: pgo
-// XFAIL: amdgpu
int main() {
int host_var = 0;
diff --git a/offload/test/offloading/gpupgo/pgo_device_only.c b/offload/test/offloading/gpupgo/pgo_device_only.c
index 644df6e..2939af61 100644
--- a/offload/test/offloading/gpupgo/pgo_device_only.c
+++ b/offload/test/offloading/gpupgo/pgo_device_only.c
@@ -16,7 +16,6 @@
// REQUIRES: amdgpu
// REQUIRES: pgo
-// XFAIL: amdgpu
int test1(int a) { return a / 2; }
int test2(int a) { return a * 2; }
diff --git a/offload/test/offloading/shared_lib_fp_mapping.c b/offload/test/offloading/shared_lib_fp_mapping.c
index c620344..e0af9b7 100644
--- a/offload/test/offloading/shared_lib_fp_mapping.c
+++ b/offload/test/offloading/shared_lib_fp_mapping.c
@@ -7,8 +7,8 @@
#include <stdio.h>
-extern int func(); // Provided in liba.so, returns 42
-typedef int (*fp_t)();
+extern int func(void); // Provided in liba.so, returns 42
+typedef int (*fp_t)(void);
int main() {
int x = 0;
diff --git a/offload/test/offloading/static_linking.c b/offload/test/offloading/static_linking.c
index 7be95a1..273109e 100644
--- a/offload/test/offloading/static_linking.c
+++ b/offload/test/offloading/static_linking.c
@@ -14,7 +14,7 @@ int foo() {
}
#else
#include <stdio.h>
-int foo();
+int foo(void);
int main() {
int x = foo();
diff --git a/offload/tools/deviceinfo/llvm-offload-device-info.cpp b/offload/tools/deviceinfo/llvm-offload-device-info.cpp
index 42ffb97..74af3bf 100644
--- a/offload/tools/deviceinfo/llvm-offload-device-info.cpp
+++ b/offload/tools/deviceinfo/llvm-offload-device-info.cpp
@@ -206,6 +206,9 @@ ol_result_t printDevice(std::ostream &S, ol_device_handle_t D) {
OFFLOAD_ERR(printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_GLOBAL_MEM_SIZE,
"Global Mem Size", "B"));
OFFLOAD_ERR(
+ printDeviceValue<uint64_t>(S, D, OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE,
+ "Work Group Shared Mem Size", "B"));
+ OFFLOAD_ERR(
(printDeviceValue<ol_device_fp_capability_flags_t, PrintKind::FP_FLAGS>(
S, D, OL_DEVICE_INFO_SINGLE_FP_CONFIG,
"Single Precision Floating Point Capability")));
diff --git a/offload/tools/offload-tblgen/EntryPointGen.cpp b/offload/tools/offload-tblgen/EntryPointGen.cpp
index 4e42e49..4f76100 100644
--- a/offload/tools/offload-tblgen/EntryPointGen.cpp
+++ b/offload/tools/offload-tblgen/EntryPointGen.cpp
@@ -83,13 +83,15 @@ static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) {
OS << ") {\n";
// Check offload is initialized
- if (F.getName() != "olInit")
+ if (F.getName() != "olInit") {
OS << "if (!llvm::offload::isOffloadInitialized()) return &UninitError;";
- // Emit pre-call prints
- OS << TAB_1 "if (llvm::offload::isTracingEnabled()) {\n";
- OS << formatv(TAB_2 "llvm::errs() << \"---> {0}\";\n", F.getName());
- OS << TAB_1 "}\n\n";
+ // Emit pre-call prints
+ // Postpone pre-calls for olInit as tracing requires liboffload to be initialized
+ OS << TAB_1 "if (llvm::offload::isTracingEnabled()) {\n";
+ OS << formatv(TAB_2 "llvm::errs() << \"---> {0}\";\n", F.getName());
+ OS << TAB_1 "}\n\n";
+ }
// Perform actual function call to the validation wrapper
ParamNameList = ParamNameList.substr(0, ParamNameList.size() - 2);
@@ -99,6 +101,10 @@ static void EmitEntryPointFunc(const FunctionRec &F, raw_ostream &OS) {
// Emit post-call prints
OS << TAB_1 "if (llvm::offload::isTracingEnabled()) {\n";
+ // postponed pre-call print for olInit
+ if (F.getName() == "olInit")
+ OS << formatv(TAB_2 "llvm::errs() << \"---> {0}\";\n", F.getName());
+
if (F.getParams().size() > 0) {
OS << formatv(TAB_2 "{0} Params = {{", F.getParamStructName());
for (const auto &Param : F.getParams()) {
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
index 30eafee..ba29fb1 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp
@@ -217,6 +217,11 @@ OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(GlobalMemSize, uint64_t,
OL_DEVICE_INFO_GLOBAL_MEM_SIZE, 0);
OL_DEVICE_INFO_TEST_HOST_SUCCESS(GlobalMemSize, uint64_t,
OL_DEVICE_INFO_GLOBAL_MEM_SIZE);
+OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(SharedMemSize, uint64_t,
+ OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE,
+ 0);
+OL_DEVICE_INFO_TEST_HOST_SUCCESS(SharedMemSize, uint64_t,
+ OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE);
TEST_P(olGetDeviceInfoTest, InvalidNullHandleDevice) {
ol_device_type_t DeviceType;
diff --git a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
index 79a18c1..2c375eb 100644
--- a/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
+++ b/offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp
@@ -71,6 +71,8 @@ OL_DEVICE_INFO_SIZE_TEST_EQ(MaxMemAllocSize, uint64_t,
OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE);
OL_DEVICE_INFO_SIZE_TEST_EQ(GlobalMemSize, uint64_t,
OL_DEVICE_INFO_GLOBAL_MEM_SIZE);
+OL_DEVICE_INFO_SIZE_TEST_EQ(SharedMemSize, uint64_t,
+ OL_DEVICE_INFO_WORK_GROUP_LOCAL_MEM_SIZE);
TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSizePerDimension) {
size_t Size = 0;