1 files changed, 240 insertions, 0 deletions
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas.h b/libgomp/testsuite/libgomp.c/interop-hipblas.h
new file mode 100644
index 0000000..d7cb174
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas.h
@@ -0,0 +1,240 @@
+/* Check whether hipBlas' daxpy works with an interop object.
+     daxpy(N, DA, DX, INCX, DY, INCY)
+   calculates (for DX = DY = 1):
+     DY(1:N) =  DY(1:N) + DA * DX(1:N)
+   and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document  */
+
+/* Minimal check whether HIP works - by checking whether the API routines
+   seem to work.  This includes a fallback if the header is not
+   available.  */
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<hipblas/hipblas.h>) && (__has_include(<library_types.h>) || !defined(__HIP_PLATFORM_NVIDIA__)) && !defined(USE_HIP_FALLBACK_HEADER)
+  #ifdef __HIP_PLATFORM_NVIDIA__
+    /* There seems to be an issue with hip/library_types.h including
+       CUDA's "library_types.h". Include CUDA's one explicitly here.
+       Could possibly worked around by using -isystem vs. -I.  */
+    #include <library_types.h>
+
+    /* For some reasons, the following symbols do not seem to get
+       mapped from HIP to CUDA, causing link errors.  */
+    #define hipblasSetStream cublasSetStream_v2
+    #define hipblasDaxpy cublasDaxpy_v2
+    #define hipblasCreate cublasCreate_v2
+  #endif
+  #include <hipblas/hipblas.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_AMD__"
+  #endif
+
+  typedef enum
+  {
+    HIPBLAS_STATUS_SUCCESS = 0
+
+  } hipblasStatus_t;
+
+  typedef struct ihipStream_t* hipStream_t;
+  typedef void* hipblasHandle_t;
+
+  hipblasStatus_t hipblasCreate (hipblasHandle_t*);
+  hipblasStatus_t hipblasSetStream (hipblasHandle_t, hipStream_t);
+  hipblasStatus_t hipblasDaxpy (hipblasHandle_t, int, const double*, const double*, int, double*, int);
+
+#else
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_NVIDA__"
+  #endif
+
+  #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+    #include <cuda.h>
+    #include <cudaTypedefs.h>
+    #include <cuda_runtime.h>
+    #include <cublas_v2.h>
+
+  #else
+    /* Add a poor man's fallback declaration.  */
+    #if defined(USE_CUDA_FALLBACK_HEADER)
+      // no warning
+    #elif !__has_include(<cuda.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda.h"
+    #elif !__has_include(<cudaTypedefs.h>)
+      #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+    #elif !__has_include(<cuda_runtime.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+    #else
+      #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+    #endif
+    #include "../../../include/cuda/cuda.h"
+
+    typedef enum {
+      CUBLAS_STATUS_SUCCESS = 0,
+    } cublasStatus_t;
+
+    typedef CUstream cudaStream_t;
+    typedef struct cublasContext* cublasHandle_t;
+
+    #define cublasCreate cublasCreate_v2
+    cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+    #define cublasSetStream cublasSetStream_v2
+    cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+    #define cublasDaxpy cublasDaxpy_v2
+    cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+  #endif
+
+  #define HIPBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
+  #define hipblasStatus_t cublasStatus_t
+  #define hipStream_t cudaStream_t
+  #define hipblasHandle_t cublasHandle_t
+  #define hipblasCreate cublasCreate
+  #define hipblasSetStream cublasSetStream
+  #define hipblasDaxpy cublasDaxpy
+#endif
+
+static int used_variant = 0;
+
+void
+run_hipBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+  used_variant = 1;
+
+  omp_interop_rc_t res;
+  hipblasStatus_t stat;
+
+  omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success && fr == omp_ifr_hip);
+
+  hipStream_t stream = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  hipblasHandle_t handle;
+  stat = hipblasCreate (&handle);
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+  stat = hipblasSetStream (handle, stream);
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+  /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space.  */
+  stat = hipblasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+}
+
+#if defined(__HIP_PLATFORM_AMD__)
+#pragma omp declare variant(run_hipBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("amdgcn")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("hip")))
+#elif defined(__HIP_PLATFORM_NVIDIA__) 
+#pragma omp declare variant(run_hipBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("hip")))
+#else
+ #error "wrong platform"
+#endif
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+  used_variant = 2;
+
+  if (incx == 1 && incy == 1)
+    #pragma omp simd
+    for (int i = 0; i < n; i++)
+      dy[i] += da * dx[i];
+  else
+    {
+      int ix = 0;
+      int iy = 0;
+      for (int i = 0; i < n; i++)
+	{
+	  dy[iy] += da * dx[ix];
+	  ix += incx;
+	  iy += incy;
+	}
+    }
+}
+
+
+void
+run_test (int dev)
+{
+  constexpr int N = 1024;
+
+  // A = {1,2,...,N}
+  // B = {-1, -2, ..., N}
+  // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+  //    = B + 3*A
+  // -> B' = {0, 2, 4, 6, ... }
+
+  double A[N], B[N];
+  double factor = 3.0;
+  for (int i = 0; i < N; i++)
+    {
+      A[i] = i;
+      B[i] = -i;
+    }
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target enter data device(dev) map(A, B)
+    }
+
+  used_variant = 99;
+  #pragma omp dispatch device(dev)
+    run_daxpy (N, factor, A, 1, B, 1);  
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+      int tmp = omp_get_default_device ();
+      omp_set_default_device (dev);
+#if defined(__HIP_PLATFORM_AMD__)
+      if (on_device_arch_gcn ())
+#else
+      if (on_device_arch_nvptx ())
+#endif
+	assert (used_variant == 1);
+      else
+	assert (used_variant == 2);
+      omp_set_default_device (tmp);
+    }
+  else
+    assert (used_variant == 2);
+
+  for (int i = 0; i < N; i++)
+    assert (B[i] == 2*i);
+}
+
+int   
+main () 
+{   
+  int ndev = omp_get_num_devices ();
+
+  for (int dev = 0; dev <= ndev; dev++)
+    run_test (dev);
+  run_test (omp_initial_device);  
+
+  return 0;
+}