aboutsummaryrefslogtreecommitdiff
path: root/offload
diff options
context:
space:
mode:
Diffstat (limited to 'offload')
-rw-r--r--offload/liboffload/API/Common.td3
-rw-r--r--offload/liboffload/src/OffloadImpl.cpp97
-rw-r--r--offload/test/offloading/CUDA/basic_launch_multi_arg.cu2
-rw-r--r--offload/test/offloading/barrier_fence.c3
-rw-r--r--offload/test/offloading/gpupgo/pgo_atomic_teams.c1
-rw-r--r--offload/test/offloading/gpupgo/pgo_atomic_threads.c1
-rw-r--r--offload/test/offloading/gpupgo/pgo_device_and_host.c1
-rw-r--r--offload/test/offloading/gpupgo/pgo_device_only.c1
-rw-r--r--offload/test/offloading/interop-print.c83
9 files changed, 151 insertions, 41 deletions
diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td
index ac27d85..b472236 100644
--- a/offload/liboffload/API/Common.td
+++ b/offload/liboffload/API/Common.td
@@ -140,9 +140,10 @@ def ol_dimensions_t : Struct {
}
def olInit : Function {
- let desc = "Perform initialization of the Offload library and plugins";
+ let desc = "Perform initialization of the Offload library";
let details = [
"This must be the first API call made by a user of the Offload library",
+ "The underlying platforms are lazily initialized on their first use"
"Each call will increment an internal reference count that is decremented by `olShutDown`"
];
let params = [];
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index c549ae0..6d22fae 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -42,9 +42,7 @@ using namespace error;
struct ol_platform_impl_t {
ol_platform_impl_t(std::unique_ptr<GenericPluginTy> Plugin,
ol_platform_backend_t BackendType)
- : Plugin(std::move(Plugin)), BackendType(BackendType) {}
- std::unique_ptr<GenericPluginTy> Plugin;
- llvm::SmallVector<std::unique_ptr<ol_device_impl_t>> Devices;
+ : BackendType(BackendType), Plugin(std::move(Plugin)) {}
ol_platform_backend_t BackendType;
/// Complete all pending work for this platform and perform any needed
@@ -53,6 +51,14 @@ struct ol_platform_impl_t {
/// After calling this function, no liboffload functions should be called with
/// this platform handle.
llvm::Error destroy();
+
+ /// Initialize the associated plugin and devices.
+ llvm::Error init();
+
+ /// Direct access to the plugin, may be uninitialized if accessed here.
+ std::unique_ptr<GenericPluginTy> Plugin;
+
+ llvm::SmallVector<std::unique_ptr<ol_device_impl_t>> Devices;
};
// Handle type definitions. Ideally these would be 1:1 with the plugins, but
@@ -130,6 +136,28 @@ llvm::Error ol_platform_impl_t::destroy() {
return Result;
}
+llvm::Error ol_platform_impl_t::init() {
+ if (!Plugin)
+ return llvm::Error::success();
+
+ if (llvm::Error Err = Plugin->init())
+ return Err;
+
+ for (auto Id = 0, End = Plugin->getNumDevices(); Id != End; Id++) {
+ if (llvm::Error Err = Plugin->initDevice(Id))
+ return Err;
+
+ auto Device = &Plugin->getDevice(Id);
+ auto Info = Device->obtainInfoImpl();
+ if (llvm::Error Err = Info.takeError())
+ return Err;
+ Devices.emplace_back(std::make_unique<ol_device_impl_t>(Id, Device, *this,
+ std::move(*Info)));
+ }
+
+ return llvm::Error::success();
+}
+
struct ol_queue_impl_t {
ol_queue_impl_t(__tgt_async_info *AsyncInfo, ol_device_handle_t Device)
: AsyncInfo(AsyncInfo), Device(Device), Id(IdCounter++) {}
@@ -207,15 +235,11 @@ struct OffloadContext {
std::mutex AllocInfoMapMutex{};
// Partitioned list of memory base addresses. Each element in this list is a
// key in AllocInfoMap
- llvm::SmallVector<void *> AllocBases{};
+ SmallVector<void *> AllocBases{};
SmallVector<std::unique_ptr<ol_platform_impl_t>, 4> Platforms{};
+ ol_device_handle_t HostDevice;
size_t RefCount;
- ol_device_handle_t HostDevice() {
- // The host platform is always inserted last
- return Platforms.back()->Devices[0].get();
- }
-
static OffloadContext &get() {
assert(OffloadContextVal);
return *OffloadContextVal;
@@ -259,28 +283,21 @@ Error initPlugins(OffloadContext &Context) {
} while (false);
#include "Shared/Targets.def"
- // Preemptively initialize all devices in the plugin
+ // Eagerly initialize all of the plugins and devices. We need to make sure
+ // that the platform is initialized at a consistent point to maintain the
+ // expected teardown order in the vendor libraries.
for (auto &Platform : Context.Platforms) {
- auto Err = Platform->Plugin->init();
- [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
- for (auto DevNum = 0; DevNum < Platform->Plugin->number_of_devices();
- DevNum++) {
- if (Platform->Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) {
- auto Device = &Platform->Plugin->getDevice(DevNum);
- auto Info = Device->obtainInfoImpl();
- if (auto Err = Info.takeError())
- return Err;
- Platform->Devices.emplace_back(std::make_unique<ol_device_impl_t>(
- DevNum, Device, *Platform, std::move(*Info)));
- }
- }
+ if (Error Err = Platform->init())
+ return Err;
}
- // Add the special host device
+ // Add the special host device.
auto &HostPlatform = Context.Platforms.emplace_back(
std::make_unique<ol_platform_impl_t>(nullptr, OL_PLATFORM_BACKEND_HOST));
- HostPlatform->Devices.emplace_back(std::make_unique<ol_device_impl_t>(
- -1, nullptr, *HostPlatform, InfoTreeNode{}));
+ Context.HostDevice = HostPlatform->Devices
+ .emplace_back(std::make_unique<ol_device_impl_t>(
+ -1, nullptr, *HostPlatform, InfoTreeNode{}))
+ .get();
Context.TracingEnabled = std::getenv("OFFLOAD_TRACE");
Context.ValidationEnabled = !std::getenv("OFFLOAD_DISABLE_VALIDATION");
@@ -312,16 +329,16 @@ Error olShutDown_impl() {
if (--OffloadContext::get().RefCount != 0)
return Error::success();
- llvm::Error Result = Error::success();
+ Error Result = Error::success();
auto *OldContext = OffloadContextVal.exchange(nullptr);
- for (auto &P : OldContext->Platforms) {
+ for (auto &Platform : OldContext->Platforms) {
// Host plugin is nullptr and has no deinit
- if (!P->Plugin || !P->Plugin->is_initialized())
+ if (!Platform->Plugin || !Platform->Plugin->is_initialized())
continue;
- if (auto Res = P->destroy())
- Result = llvm::joinErrors(std::move(Result), std::move(Res));
+ if (auto Res = Platform->destroy())
+ Result = joinErrors(std::move(Result), std::move(Res));
}
delete OldContext;
@@ -334,6 +351,8 @@ Error olGetPlatformInfoImplDetail(ol_platform_handle_t Platform,
InfoWriter Info(PropSize, PropValue, PropSizeRet);
bool IsHost = Platform->BackendType == OL_PLATFORM_BACKEND_HOST;
+ // Note that the plugin is potentially uninitialized here. It will need to be
+ // initialized once info is added that requires it to be initialized.
switch (PropName) {
case OL_PLATFORM_INFO_NAME:
return Info.writeString(IsHost ? "Host" : Platform->Plugin->getName());
@@ -373,12 +392,12 @@ Error olGetPlatformInfoSize_impl(ol_platform_handle_t Platform,
Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
ol_device_info_t PropName, size_t PropSize,
void *PropValue, size_t *PropSizeRet) {
- assert(Device != OffloadContext::get().HostDevice());
+ assert(Device != OffloadContext::get().HostDevice);
InfoWriter Info(PropSize, PropValue, PropSizeRet);
auto makeError = [&](ErrorCode Code, StringRef Err) {
std::string ErrBuffer;
- llvm::raw_string_ostream(ErrBuffer) << PropName << ": " << Err;
+ raw_string_ostream(ErrBuffer) << PropName << ": " << Err;
return Plugin::error(ErrorCode::UNIMPLEMENTED, ErrBuffer.c_str());
};
@@ -511,7 +530,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
ol_device_info_t PropName, size_t PropSize,
void *PropValue, size_t *PropSizeRet) {
- assert(Device == OffloadContext::get().HostDevice());
+ assert(Device == OffloadContext::get().HostDevice);
InfoWriter Info(PropSize, PropValue, PropSizeRet);
constexpr auto uint32_max = std::numeric_limits<uint32_t>::max();
@@ -579,7 +598,7 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
Error olGetDeviceInfo_impl(ol_device_handle_t Device, ol_device_info_t PropName,
size_t PropSize, void *PropValue) {
- if (Device == OffloadContext::get().HostDevice())
+ if (Device == OffloadContext::get().HostDevice)
return olGetDeviceInfoImplDetailHost(Device, PropName, PropSize, PropValue,
nullptr);
return olGetDeviceInfoImplDetail(Device, PropName, PropSize, PropValue,
@@ -588,7 +607,7 @@ Error olGetDeviceInfo_impl(ol_device_handle_t Device, ol_device_info_t PropName,
Error olGetDeviceInfoSize_impl(ol_device_handle_t Device,
ol_device_info_t PropName, size_t *PropSizeRet) {
- if (Device == OffloadContext::get().HostDevice())
+ if (Device == OffloadContext::get().HostDevice)
return olGetDeviceInfoImplDetailHost(Device, PropName, 0, nullptr,
PropSizeRet);
return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet);
@@ -598,7 +617,7 @@ Error olIterateDevices_impl(ol_device_iterate_cb_t Callback, void *UserData) {
for (auto &Platform : OffloadContext::get().Platforms) {
for (auto &Device : Platform->Devices) {
if (!Callback(Device.get(), UserData)) {
- break;
+ return Error::success();
}
}
}
@@ -949,7 +968,7 @@ Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) {
Error olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
ol_device_handle_t DstDevice, const void *SrcPtr,
ol_device_handle_t SrcDevice, size_t Size) {
- auto Host = OffloadContext::get().HostDevice();
+ auto Host = OffloadContext::get().HostDevice;
if (DstDevice == Host && SrcDevice == Host) {
if (!Queue) {
std::memcpy(DstPtr, SrcPtr, Size);
@@ -1138,7 +1157,7 @@ Error olGetSymbolInfoImplDetail(ol_symbol_handle_t Symbol,
auto CheckKind = [&](ol_symbol_kind_t Required) {
if (Symbol->Kind != Required) {
std::string ErrBuffer;
- llvm::raw_string_ostream(ErrBuffer)
+ raw_string_ostream(ErrBuffer)
<< PropName << ": Expected a symbol of Kind " << Required
<< " but given a symbol of Kind " << Symbol->Kind;
return Plugin::error(ErrorCode::SYMBOL_KIND, ErrBuffer.c_str());
diff --git a/offload/test/offloading/CUDA/basic_launch_multi_arg.cu b/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
index b2e1edf..7a32983f 100644
--- a/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
+++ b/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
@@ -8,7 +8,7 @@
// REQUIRES: gpu
//
// FIXME: https://github.com/llvm/llvm-project/issues/161265
-// XFAIL: gpu
+// UNSUPPORTED: gpu
#include <stdio.h>
diff --git a/offload/test/offloading/barrier_fence.c b/offload/test/offloading/barrier_fence.c
index 73d259d..e43db0a5 100644
--- a/offload/test/offloading/barrier_fence.c
+++ b/offload/test/offloading/barrier_fence.c
@@ -4,6 +4,9 @@
// RUN: %libomptarget-run-generic
// REQUIRES: gpu
+//
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// UNSUPPORTED: gpu
#include <omp.h>
#include <stdio.h>
diff --git a/offload/test/offloading/gpupgo/pgo_atomic_teams.c b/offload/test/offloading/gpupgo/pgo_atomic_teams.c
index b3b72db..42d8ae4 100644
--- a/offload/test/offloading/gpupgo/pgo_atomic_teams.c
+++ b/offload/test/offloading/gpupgo/pgo_atomic_teams.c
@@ -18,6 +18,7 @@
// REQUIRES: amdgpu
// REQUIRES: pgo
+// XFAIL: amdgpu
int test1(int a) { return a / 2; }
int test2(int a) { return a * 2; }
diff --git a/offload/test/offloading/gpupgo/pgo_atomic_threads.c b/offload/test/offloading/gpupgo/pgo_atomic_threads.c
index 440a6b5..09a4dc1 100644
--- a/offload/test/offloading/gpupgo/pgo_atomic_threads.c
+++ b/offload/test/offloading/gpupgo/pgo_atomic_threads.c
@@ -18,6 +18,7 @@
// REQUIRES: amdgpu
// REQUIRES: pgo
+// XFAIL: amdgpu
int test1(int a) { return a / 2; }
diff --git a/offload/test/offloading/gpupgo/pgo_device_and_host.c b/offload/test/offloading/gpupgo/pgo_device_and_host.c
index 3e95791..c53e69a 100644
--- a/offload/test/offloading/gpupgo/pgo_device_and_host.c
+++ b/offload/test/offloading/gpupgo/pgo_device_and_host.c
@@ -50,6 +50,7 @@
// REQUIRES: amdgpu
// REQUIRES: pgo
+// XFAIL: amdgpu
int main() {
int host_var = 0;
diff --git a/offload/test/offloading/gpupgo/pgo_device_only.c b/offload/test/offloading/gpupgo/pgo_device_only.c
index 2939af61..644df6e 100644
--- a/offload/test/offloading/gpupgo/pgo_device_only.c
+++ b/offload/test/offloading/gpupgo/pgo_device_only.c
@@ -16,6 +16,7 @@
// REQUIRES: amdgpu
// REQUIRES: pgo
+// XFAIL: amdgpu
int test1(int a) { return a / 2; }
int test2(int a) { return a * 2; }
diff --git a/offload/test/offloading/interop-print.c b/offload/test/offloading/interop-print.c
new file mode 100644
index 0000000..a386420
--- /dev/null
+++ b/offload/test/offloading/interop-print.c
@@ -0,0 +1,83 @@
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa
+// RUN: %libomptarget-run-generic 2>&1 | \
+// RUN: %fcheck-amdgcn-amd-amdhsa -check-prefixes=AMD
+
+// RUN: %libomptarget-compile-nvptx64-nvidia-cuda
+// RUN: %libomptarget-run-generic 2>&1 | \
+// RUN: %fcheck-nvptx64-nvidia-cuda -check-prefixes=NVIDIA
+
+// REQUIRES: gpu
+// XFAIL: nvptx64-nvidia-cuda
+
+#include <omp.h>
+#include <stdio.h>
+
+const char *interop_int_to_string(const int interop_int) {
+ switch (interop_int) {
+ case 1:
+ return "cuda";
+ case 2:
+ return "cuda_driver";
+ case 3:
+ return "opencl";
+ case 4:
+ return "sycl";
+ case 5:
+ return "hip";
+ case 6:
+ return "level_zero";
+ case 7:
+ return "hsa";
+ default:
+ return "unknown";
+ }
+}
+
+int main(int argc, char **argv) {
+
+ // Loop over all available devices
+ for (int id = 0; id < omp_get_num_devices(); ++id) {
+ omp_interop_t iobj = omp_interop_none;
+
+ // TODO: Change targetsync to target when AMD toolchain supports it.
+#pragma omp interop init(target : iobj) device(id)
+
+ int err;
+ int interop_int = omp_get_interop_int(iobj, omp_ipr_fr_id, &err);
+
+ if (err) {
+ fprintf(stderr, "omp_get_interop_int failed: %d\n", err);
+ return -1;
+ }
+
+ // AMD: {{.*}} hsa
+ // NVIDIA: {{.*}} cuda
+ printf("omp_get_interop_int returned %s\n",
+ interop_int_to_string(interop_int));
+
+ const char *interop_vendor =
+ omp_get_interop_str(iobj, omp_ipr_vendor_name, &err);
+ if (err) {
+ fprintf(stderr, "omp_get_interop_str failed: %d\n", err);
+ return -1;
+ }
+
+ // AMD: {{.*}} amd
+ // NVIDIA: {{.*}} nvidia
+ printf("omp_get_interop_str returned %s\n", interop_vendor);
+
+ const char *interop_fr_name =
+ omp_get_interop_str(iobj, omp_ipr_fr_name, &err);
+ if (err) {
+ fprintf(stderr, "omp_get_interop_str failed: %d\n", err);
+ return -1;
+ }
+
+ // AMD: {{.*}} hsa
+ // NVIDIA: {{.*}} cuda
+ printf("omp_get_interop_str returned %s\n", interop_fr_name);
+
+#pragma omp interop destroy(iobj)
+ }
+ return 0;
+}