9 files changed, 151 insertions, 41 deletions
diff --git a/offload/liboffload/API/Common.td b/offload/liboffload/API/Common.td
index ac27d85..b472236 100644
--- a/offload/liboffload/API/Common.td
+++ b/offload/liboffload/API/Common.td
@@ -140,9 +140,10 @@ def ol_dimensions_t : Struct {
 }
 
 def olInit : Function {
-  let desc = "Perform initialization of the Offload library and plugins";
+  let desc = "Perform initialization of the Offload library";
   let details = [
     "This must be the first API call made by a user of the Offload library",
+    "The underlying platforms are lazily initialized on their first use"
     "Each call will increment an internal reference count that is decremented by `olShutDown`"
   ];
   let params = [];
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index c549ae0..6d22fae 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -42,9 +42,7 @@ using namespace error;
 struct ol_platform_impl_t {
   ol_platform_impl_t(std::unique_ptr<GenericPluginTy> Plugin,
                      ol_platform_backend_t BackendType)
-      : Plugin(std::move(Plugin)), BackendType(BackendType) {}
-  std::unique_ptr<GenericPluginTy> Plugin;
-  llvm::SmallVector<std::unique_ptr<ol_device_impl_t>> Devices;
+      : BackendType(BackendType), Plugin(std::move(Plugin)) {}
   ol_platform_backend_t BackendType;
 
   /// Complete all pending work for this platform and perform any needed
@@ -53,6 +51,14 @@ struct ol_platform_impl_t {
   /// After calling this function, no liboffload functions should be called with
   /// this platform handle.
   llvm::Error destroy();
+
+  /// Initialize the associated plugin and devices.
+  llvm::Error init();
+
+  /// Direct access to the plugin, may be uninitialized if accessed here.
+  std::unique_ptr<GenericPluginTy> Plugin;
+
+  llvm::SmallVector<std::unique_ptr<ol_device_impl_t>> Devices;
 };
 
 // Handle type definitions. Ideally these would be 1:1 with the plugins, but
@@ -130,6 +136,28 @@ llvm::Error ol_platform_impl_t::destroy() {
   return Result;
 }
 
+llvm::Error ol_platform_impl_t::init() {
+  if (!Plugin)
+    return llvm::Error::success();
+
+  if (llvm::Error Err = Plugin->init())
+    return Err;
+
+  for (auto Id = 0, End = Plugin->getNumDevices(); Id != End; Id++) {
+    if (llvm::Error Err = Plugin->initDevice(Id))
+      return Err;
+
+    auto Device = &Plugin->getDevice(Id);
+    auto Info = Device->obtainInfoImpl();
+    if (llvm::Error Err = Info.takeError())
+      return Err;
+    Devices.emplace_back(std::make_unique<ol_device_impl_t>(Id, Device, *this,
+                                                            std::move(*Info)));
+  }
+
+  return llvm::Error::success();
+}
+
 struct ol_queue_impl_t {
   ol_queue_impl_t(__tgt_async_info *AsyncInfo, ol_device_handle_t Device)
       : AsyncInfo(AsyncInfo), Device(Device), Id(IdCounter++) {}
@@ -207,15 +235,11 @@ struct OffloadContext {
   std::mutex AllocInfoMapMutex{};
   // Partitioned list of memory base addresses. Each element in this list is a
   // key in AllocInfoMap
-  llvm::SmallVector<void *> AllocBases{};
+  SmallVector<void *> AllocBases{};
   SmallVector<std::unique_ptr<ol_platform_impl_t>, 4> Platforms{};
+  ol_device_handle_t HostDevice;
   size_t RefCount;
 
-  ol_device_handle_t HostDevice() {
-    // The host platform is always inserted last
-    return Platforms.back()->Devices[0].get();
-  }
-
   static OffloadContext &get() {
     assert(OffloadContextVal);
     return *OffloadContextVal;
@@ -259,28 +283,21 @@ Error initPlugins(OffloadContext &Context) {
   } while (false);
 #include "Shared/Targets.def"
 
-  // Preemptively initialize all devices in the plugin
+  // Eagerly initialize all of the plugins and devices. We need to make sure
+  // that the platform is initialized at a consistent point to maintain the
+  // expected teardown order in the vendor libraries.
   for (auto &Platform : Context.Platforms) {
-    auto Err = Platform->Plugin->init();
-    [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
-    for (auto DevNum = 0; DevNum < Platform->Plugin->number_of_devices();
-         DevNum++) {
-      if (Platform->Plugin->init_device(DevNum) == OFFLOAD_SUCCESS) {
-        auto Device = &Platform->Plugin->getDevice(DevNum);
-        auto Info = Device->obtainInfoImpl();
-        if (auto Err = Info.takeError())
-          return Err;
-        Platform->Devices.emplace_back(std::make_unique<ol_device_impl_t>(
-            DevNum, Device, *Platform, std::move(*Info)));
-      }
-    }
+    if (Error Err = Platform->init())
+      return Err;
   }
 
-  // Add the special host device
+  // Add the special host device.
   auto &HostPlatform = Context.Platforms.emplace_back(
       std::make_unique<ol_platform_impl_t>(nullptr, OL_PLATFORM_BACKEND_HOST));
-  HostPlatform->Devices.emplace_back(std::make_unique<ol_device_impl_t>(
-      -1, nullptr, *HostPlatform, InfoTreeNode{}));
+  Context.HostDevice = HostPlatform->Devices
+                           .emplace_back(std::make_unique<ol_device_impl_t>(
+                               -1, nullptr, *HostPlatform, InfoTreeNode{}))
+                           .get();
 
   Context.TracingEnabled = std::getenv("OFFLOAD_TRACE");
   Context.ValidationEnabled = !std::getenv("OFFLOAD_DISABLE_VALIDATION");
@@ -312,16 +329,16 @@ Error olShutDown_impl() {
   if (--OffloadContext::get().RefCount != 0)
     return Error::success();
 
-  llvm::Error Result = Error::success();
+  Error Result = Error::success();
   auto *OldContext = OffloadContextVal.exchange(nullptr);
 
-  for (auto &P : OldContext->Platforms) {
+  for (auto &Platform : OldContext->Platforms) {
     // Host plugin is nullptr and has no deinit
-    if (!P->Plugin || !P->Plugin->is_initialized())
+    if (!Platform->Plugin || !Platform->Plugin->is_initialized())
       continue;
 
-    if (auto Res = P->destroy())
-      Result = llvm::joinErrors(std::move(Result), std::move(Res));
+    if (auto Res = Platform->destroy())
+      Result = joinErrors(std::move(Result), std::move(Res));
   }
 
   delete OldContext;
@@ -334,6 +351,8 @@ Error olGetPlatformInfoImplDetail(ol_platform_handle_t Platform,
   InfoWriter Info(PropSize, PropValue, PropSizeRet);
   bool IsHost = Platform->BackendType == OL_PLATFORM_BACKEND_HOST;
 
+  // Note that the plugin is potentially uninitialized here. It will need to be
+  // initialized once info is added that requires it to be initialized.
   switch (PropName) {
   case OL_PLATFORM_INFO_NAME:
     return Info.writeString(IsHost ? "Host" : Platform->Plugin->getName());
@@ -373,12 +392,12 @@ Error olGetPlatformInfoSize_impl(ol_platform_handle_t Platform,
 Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
                                 ol_device_info_t PropName, size_t PropSize,
                                 void *PropValue, size_t *PropSizeRet) {
-  assert(Device != OffloadContext::get().HostDevice());
+  assert(Device != OffloadContext::get().HostDevice);
   InfoWriter Info(PropSize, PropValue, PropSizeRet);
 
   auto makeError = [&](ErrorCode Code, StringRef Err) {
     std::string ErrBuffer;
-    llvm::raw_string_ostream(ErrBuffer) << PropName << ": " << Err;
+    raw_string_ostream(ErrBuffer) << PropName << ": " << Err;
     return Plugin::error(ErrorCode::UNIMPLEMENTED, ErrBuffer.c_str());
   };
 
@@ -511,7 +530,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
 Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
                                     ol_device_info_t PropName, size_t PropSize,
                                     void *PropValue, size_t *PropSizeRet) {
-  assert(Device == OffloadContext::get().HostDevice());
+  assert(Device == OffloadContext::get().HostDevice);
   InfoWriter Info(PropSize, PropValue, PropSizeRet);
 
   constexpr auto uint32_max = std::numeric_limits<uint32_t>::max();
@@ -579,7 +598,7 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
 
 Error olGetDeviceInfo_impl(ol_device_handle_t Device, ol_device_info_t PropName,
                            size_t PropSize, void *PropValue) {
-  if (Device == OffloadContext::get().HostDevice())
+  if (Device == OffloadContext::get().HostDevice)
     return olGetDeviceInfoImplDetailHost(Device, PropName, PropSize, PropValue,
                                          nullptr);
   return olGetDeviceInfoImplDetail(Device, PropName, PropSize, PropValue,
@@ -588,7 +607,7 @@ Error olGetDeviceInfo_impl(ol_device_handle_t Device, ol_device_info_t PropName,
 
 Error olGetDeviceInfoSize_impl(ol_device_handle_t Device,
                                ol_device_info_t PropName, size_t *PropSizeRet) {
-  if (Device == OffloadContext::get().HostDevice())
+  if (Device == OffloadContext::get().HostDevice)
     return olGetDeviceInfoImplDetailHost(Device, PropName, 0, nullptr,
                                          PropSizeRet);
   return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet);
@@ -598,7 +617,7 @@ Error olIterateDevices_impl(ol_device_iterate_cb_t Callback, void *UserData) {
   for (auto &Platform : OffloadContext::get().Platforms) {
     for (auto &Device : Platform->Devices) {
       if (!Callback(Device.get(), UserData)) {
-        break;
+        return Error::success();
       }
     }
   }
@@ -949,7 +968,7 @@ Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) {
 Error olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
                     ol_device_handle_t DstDevice, const void *SrcPtr,
                     ol_device_handle_t SrcDevice, size_t Size) {
-  auto Host = OffloadContext::get().HostDevice();
+  auto Host = OffloadContext::get().HostDevice;
   if (DstDevice == Host && SrcDevice == Host) {
     if (!Queue) {
       std::memcpy(DstPtr, SrcPtr, Size);
@@ -1138,7 +1157,7 @@ Error olGetSymbolInfoImplDetail(ol_symbol_handle_t Symbol,
   auto CheckKind = [&](ol_symbol_kind_t Required) {
     if (Symbol->Kind != Required) {
       std::string ErrBuffer;
-      llvm::raw_string_ostream(ErrBuffer)
+      raw_string_ostream(ErrBuffer)
           << PropName << ": Expected a symbol of Kind " << Required
           << " but given a symbol of Kind " << Symbol->Kind;
       return Plugin::error(ErrorCode::SYMBOL_KIND, ErrBuffer.c_str());
diff --git a/offload/test/offloading/CUDA/basic_launch_multi_arg.cu b/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
index b2e1edf..7a32983f 100644
--- a/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
+++ b/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
@@ -8,7 +8,7 @@
 // REQUIRES: gpu
 //
 // FIXME: https://github.com/llvm/llvm-project/issues/161265
-// XFAIL: gpu
+// UNSUPPORTED: gpu
 
 #include <stdio.h>
 
diff --git a/offload/test/offloading/barrier_fence.c b/offload/test/offloading/barrier_fence.c
index 73d259d..e43db0a5 100644
--- a/offload/test/offloading/barrier_fence.c
+++ b/offload/test/offloading/barrier_fence.c
@@ -4,6 +4,9 @@
 // RUN: %libomptarget-run-generic
 
 // REQUIRES: gpu
+//
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// UNSUPPORTED: gpu
 
 #include <omp.h>
 #include <stdio.h>
diff --git a/offload/test/offloading/gpupgo/pgo_atomic_teams.c b/offload/test/offloading/gpupgo/pgo_atomic_teams.c
index b3b72db..42d8ae4 100644
--- a/offload/test/offloading/gpupgo/pgo_atomic_teams.c
+++ b/offload/test/offloading/gpupgo/pgo_atomic_teams.c
@@ -18,6 +18,7 @@
 
 // REQUIRES: amdgpu
 // REQUIRES: pgo
+// XFAIL: amdgpu
 
 int test1(int a) { return a / 2; }
 int test2(int a) { return a * 2; }
diff --git a/offload/test/offloading/gpupgo/pgo_atomic_threads.c b/offload/test/offloading/gpupgo/pgo_atomic_threads.c
index 440a6b5..09a4dc1 100644
--- a/offload/test/offloading/gpupgo/pgo_atomic_threads.c
+++ b/offload/test/offloading/gpupgo/pgo_atomic_threads.c
@@ -18,6 +18,7 @@
 
 // REQUIRES: amdgpu
 // REQUIRES: pgo
+// XFAIL: amdgpu
 
 int test1(int a) { return a / 2; }
 
diff --git a/offload/test/offloading/gpupgo/pgo_device_and_host.c b/offload/test/offloading/gpupgo/pgo_device_and_host.c
index 3e95791..c53e69a 100644
--- a/offload/test/offloading/gpupgo/pgo_device_and_host.c
+++ b/offload/test/offloading/gpupgo/pgo_device_and_host.c
@@ -50,6 +50,7 @@
 
 // REQUIRES: amdgpu
 // REQUIRES: pgo
+// XFAIL: amdgpu
 
 int main() {
   int host_var = 0;
diff --git a/offload/test/offloading/gpupgo/pgo_device_only.c b/offload/test/offloading/gpupgo/pgo_device_only.c
index 2939af61..644df6e 100644
--- a/offload/test/offloading/gpupgo/pgo_device_only.c
+++ b/offload/test/offloading/gpupgo/pgo_device_only.c
@@ -16,6 +16,7 @@
 
 // REQUIRES: amdgpu
 // REQUIRES: pgo
+// XFAIL: amdgpu
 
 int test1(int a) { return a / 2; }
 int test2(int a) { return a * 2; }
diff --git a/offload/test/offloading/interop-print.c b/offload/test/offloading/interop-print.c
new file mode 100644
index 0000000..a386420
--- /dev/null
+++ b/offload/test/offloading/interop-print.c
@@ -0,0 +1,83 @@
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa
+// RUN:   %libomptarget-run-generic 2>&1 | \
+// RUN:   %fcheck-amdgcn-amd-amdhsa -check-prefixes=AMD
+
+// RUN: %libomptarget-compile-nvptx64-nvidia-cuda
+// RUN:   %libomptarget-run-generic 2>&1 | \
+// RUN:   %fcheck-nvptx64-nvidia-cuda -check-prefixes=NVIDIA
+
+// REQUIRES: gpu
+// XFAIL: nvptx64-nvidia-cuda
+
+#include <omp.h>
+#include <stdio.h>
+
+const char *interop_int_to_string(const int interop_int) {
+  switch (interop_int) {
+  case 1:
+    return "cuda";
+  case 2:
+    return "cuda_driver";
+  case 3:
+    return "opencl";
+  case 4:
+    return "sycl";
+  case 5:
+    return "hip";
+  case 6:
+    return "level_zero";
+  case 7:
+    return "hsa";
+  default:
+    return "unknown";
+  }
+}
+
+int main(int argc, char **argv) {
+
+  // Loop over all available devices
+  for (int id = 0; id < omp_get_num_devices(); ++id) {
+    omp_interop_t iobj = omp_interop_none;
+
+    // TODO: Change targetsync to target when AMD toolchain supports it.
+#pragma omp interop init(target : iobj) device(id)
+
+    int err;
+    int interop_int = omp_get_interop_int(iobj, omp_ipr_fr_id, &err);
+
+    if (err) {
+      fprintf(stderr, "omp_get_interop_int failed: %d\n", err);
+      return -1;
+    }
+
+    // AMD: {{.*}} hsa
+    // NVIDIA: {{.*}} cuda
+    printf("omp_get_interop_int returned %s\n",
+           interop_int_to_string(interop_int));
+
+    const char *interop_vendor =
+        omp_get_interop_str(iobj, omp_ipr_vendor_name, &err);
+    if (err) {
+      fprintf(stderr, "omp_get_interop_str failed: %d\n", err);
+      return -1;
+    }
+
+    // AMD: {{.*}} amd
+    // NVIDIA: {{.*}} nvidia
+    printf("omp_get_interop_str returned %s\n", interop_vendor);
+
+    const char *interop_fr_name =
+        omp_get_interop_str(iobj, omp_ipr_fr_name, &err);
+    if (err) {
+      fprintf(stderr, "omp_get_interop_str failed: %d\n", err);
+      return -1;
+    }
+
+    // AMD: {{.*}} hsa
+    // NVIDIA: {{.*}} cuda
+    printf("omp_get_interop_str returned %s\n", interop_fr_name);
+
+#pragma omp interop destroy(iobj)
+  }
+  return 0;
+}