aboutsummaryrefslogtreecommitdiff
path: root/libgomp/testsuite/libgomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'libgomp/testsuite/libgomp.c')
-rw-r--r--libgomp/testsuite/libgomp.c/append-args-fr-1.c232
-rw-r--r--libgomp/testsuite/libgomp.c/append-args-fr.h305
-rw-r--r--libgomp/testsuite/libgomp.c/interop-cublas-full.c176
-rw-r--r--libgomp/testsuite/libgomp.c/interop-cublas-libonly.c7
-rw-r--r--libgomp/testsuite/libgomp.c/interop-cuda-full.c159
-rw-r--r--libgomp/testsuite/libgomp.c/interop-cuda-libonly.c8
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip-amd-full.c7
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c8
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c8
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c10
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c9
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hip.h234
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c7
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c8
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c7
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c9
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c8
-rw-r--r--libgomp/testsuite/libgomp.c/interop-hipblas.h240
18 files changed, 1442 insertions, 0 deletions
diff --git a/libgomp/testsuite/libgomp.c/append-args-fr-1.c b/libgomp/testsuite/libgomp.c/append-args-fr-1.c
new file mode 100644
index 0000000..2fd7eda
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/append-args-fr-1.c
@@ -0,0 +1,232 @@
+/* { dg-do run } */
+
+#include "append-args-fr.h"
+
+enum { host_device, nvptx_device, gcn_device } used_device_type, used_device_type2;
+static int used_device_num, used_device_num2;
+static omp_interop_fr_t expected_fr, expected_fr2;
+static _Bool is_targetsync, is_targetsync2;
+
+void
+check_interop (omp_interop_t obj)
+{
+ if (used_device_type == host_device)
+ check_host (obj);
+ else if (used_device_type == nvptx_device)
+ check_nvptx (obj, used_device_num, expected_fr, is_targetsync);
+ else if (used_device_type == gcn_device)
+ check_gcn (obj, used_device_num, expected_fr, is_targetsync);
+ else
+ __builtin_unreachable ();
+
+ #pragma omp interop use(obj)
+}
+
+void
+check_interop2 (omp_interop_t obj, omp_interop_t obj2)
+{
+ check_interop (obj);
+
+ #pragma omp interop use(obj2)
+
+ if (used_device_type2 == host_device)
+ check_host (obj2);
+ else if (used_device_type2 == nvptx_device)
+ check_nvptx (obj2, used_device_num2, expected_fr2, is_targetsync2);
+ else if (used_device_type2 == gcn_device)
+ check_gcn (obj2, used_device_num2, expected_fr2, is_targetsync2);
+ else
+ __builtin_unreachable ();
+}
+
+
+/* Check no args + one interop arg - and no prefer_type. */
+
+int f0_1_tg_ (omp_interop_t obj) { check_interop (obj); return 4242; }
+#pragma omp declare variant(f0_1_tg_) match(construct={dispatch}) append_args(interop(target))
+int f0_1_tg () { assert (false); return 42; }
+
+void f0_1_tgsy_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_) match(construct={dispatch}) append_args(interop(targetsync))
+void f0_1_tgsy () { assert (false); }
+
+int f0_1_tgtgsy_ (omp_interop_t obj) { check_interop (obj); return 3333; }
+#pragma omp declare variant(f0_1_tgtgsy_) match(construct={dispatch}) append_args(interop(targetsync,target))
+int f0_1_tgtgsy () { assert (false); return 33; }
+
+
+/* And with PREFER_TYPE. */
+
+// nv: cuda, gcn: -, -, hip
+void f0_1_tgsy_c_cd_hi_hs_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_c_cd_hi_hs_) match(construct={dispatch}) \
+ append_args(interop(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa")))
+void f0_1_tgsy_c_cd_hi_hs () { assert (false); }
+
+// nv: -, cuda_driver, gcn: hsa
+void f0_1_tgsy_hs_cd_c_hi_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_hs_cd_c_hi_) match(construct={dispatch}) \
+ append_args(interop(targetsync, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda_driver"), attr("ompx_foobar")},{fr("cuda")}, {fr("hip")})))
+void f0_1_tgsy_hs_cd_c_hi () { assert (false); }
+
+// nv: -, hip, gcn: hsa
+void f0_1_tgsy_hs_hi_cd_c_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_hs_hi_cd_c_) match(construct={dispatch}) \
+ append_args(interop(targetsync, prefer_type("hsa", "hip", "cuda_driver", "cuda")))
+void f0_1_tgsy_hs_hi_cd_c () { assert (false); }
+
+
+void
+check_f0 ()
+{
+ if (used_device_type == nvptx_device)
+ expected_fr = omp_ifr_cuda;
+ else if (used_device_type == gcn_device)
+ expected_fr = omp_ifr_hip;
+ else /* host; variable shall not be accessed */
+ expected_fr = omp_ifr_level_zero;
+
+ int i;
+ if (used_device_num == DEFAULT_DEVICE)
+ {
+ is_targetsync = 0;
+ #pragma omp dispatch
+ i = f0_1_tg ();
+ assert (i == 4242);
+
+ is_targetsync = 1;
+ #pragma omp dispatch
+ f0_1_tgsy ();
+
+ #pragma omp dispatch
+ i = f0_1_tgtgsy ();
+ assert (i == 3333);
+
+
+ if (used_device_type == nvptx_device)
+ expected_fr = omp_ifr_cuda;
+ else if (used_device_type == gcn_device)
+ expected_fr = omp_ifr_hip;
+ #pragma omp dispatch
+ f0_1_tgsy_c_cd_hi_hs ();
+
+ if (used_device_type == nvptx_device)
+ expected_fr = omp_ifr_cuda_driver;
+ else if (used_device_type == gcn_device)
+ expected_fr = omp_ifr_hsa;
+ #pragma omp dispatch
+ f0_1_tgsy_hs_cd_c_hi ();
+
+ if (used_device_type == nvptx_device)
+ expected_fr = omp_ifr_hip;
+ else if (used_device_type == gcn_device)
+ expected_fr = omp_ifr_hsa;
+ #pragma omp dispatch
+ f0_1_tgsy_hs_hi_cd_c ();
+ }
+ else
+ {
+ is_targetsync = 0;
+ #pragma omp dispatch device(used_device_num)
+ i = f0_1_tg ();
+ assert (i == 4242);
+
+ is_targetsync = 1;
+ #pragma omp dispatch device(used_device_num)
+ f0_1_tgsy ();
+
+ #pragma omp dispatch device(used_device_num)
+ i = f0_1_tgtgsy ();
+ assert (i == 3333);
+
+
+ if (used_device_type == nvptx_device)
+ expected_fr = omp_ifr_cuda;
+ else if (used_device_type == gcn_device)
+ expected_fr = omp_ifr_hip;
+ #pragma omp dispatch device(used_device_num)
+ f0_1_tgsy_c_cd_hi_hs ();
+
+ if (used_device_type == nvptx_device)
+ expected_fr = omp_ifr_cuda_driver;
+ else if (used_device_type == gcn_device)
+ expected_fr = omp_ifr_hsa;
+ #pragma omp dispatch device(used_device_num)
+ f0_1_tgsy_hs_cd_c_hi ();
+
+ if (used_device_type == nvptx_device)
+ expected_fr = omp_ifr_hip;
+ else if (used_device_type == gcn_device)
+ expected_fr = omp_ifr_hsa;
+ #pragma omp dispatch device(used_device_num)
+ f0_1_tgsy_hs_hi_cd_c ();
+ }
+}
+
+
+
+void
+do_check (int dev)
+{
+ int num_dev = omp_get_num_devices ();
+ const char *dev_type;
+ if (dev != DEFAULT_DEVICE)
+ omp_set_default_device (dev);
+ int is_nvptx = on_device_arch_nvptx ();
+ int is_gcn = on_device_arch_gcn ();
+ int is_host;
+
+ if (dev != DEFAULT_DEVICE)
+ is_host = dev == -1 || dev == num_dev;
+ else
+ {
+ int def_dev = omp_get_default_device ();
+ is_host = def_dev == -1 || def_dev == num_dev;
+ }
+
+ assert (is_nvptx + is_gcn + is_host == 1);
+
+ if (num_dev > 0 && dev != DEFAULT_DEVICE)
+ {
+ if (is_host)
+ omp_set_default_device (0);
+ else
+ omp_set_default_device (-1);
+ }
+
+ used_device_num = dev;
+ if (is_host)
+ {
+ dev_type = "host";
+ used_device_type = host_device;
+ }
+ else if (is_nvptx)
+ {
+ dev_type = "nvptx";
+ used_device_type = nvptx_device;
+ }
+ else if (is_gcn)
+ {
+ dev_type = "gcn";
+ used_device_type = gcn_device;
+ }
+
+ printf ("Running on the %s device (%d)\n", dev_type, dev);
+ check_f0 ();
+}
+
+
+
+int
+main ()
+{
+ do_check (DEFAULT_DEVICE);
+ int ndev = omp_get_num_devices ();
+ for (int dev = -1; dev < ndev; dev++)
+ do_check (dev);
+ for (int dev = -1; dev < ndev; dev++)
+ {
+ omp_set_default_device (dev);
+ do_check (DEFAULT_DEVICE);
+ }
+}
diff --git a/libgomp/testsuite/libgomp.c/append-args-fr.h b/libgomp/testsuite/libgomp.c/append-args-fr.h
new file mode 100644
index 0000000..9f6ca04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/append-args-fr.h
@@ -0,0 +1,305 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+/* Provides: */
+
+#define DEFAULT_DEVICE -99
+
+void check_host (omp_interop_t obj);
+void check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync);
+void check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync);
+
+
+/* The following assumes that when a nvptx device is available,
+ cuda/cuda_driver/hip are supported.
+ And that likewise when a gcn device is available that the
+ plugin also can not only the HSA but also the HIP library
+ such that hsa/hip are supported.
+ For the host, omp_interop_none is expected.
+
+ Otherwise, it only does some basic tests without checking
+ that the returned result really makes sense. */
+
+void check_type (omp_interop_t obj)
+{
+ const char *type;
+
+ type = omp_get_interop_type_desc (obj, omp_ipr_fr_id);
+ if (obj != omp_interop_none)
+ assert (strcmp (type, "omp_interop_t") == 0);
+ else
+ assert (type == NULL);
+
+ type = omp_get_interop_type_desc (obj, omp_ipr_fr_name);
+ if (obj != omp_interop_none)
+ assert (strcmp (type, "const char *") == 0);
+ else
+ assert (type == NULL);
+
+ type = omp_get_interop_type_desc (obj, omp_ipr_vendor);
+ if (obj != omp_interop_none)
+ assert (strcmp (type, "int") == 0);
+ else
+ assert (type == NULL);
+
+ type = omp_get_interop_type_desc (obj, omp_ipr_vendor_name);
+ if (obj != omp_interop_none)
+ assert (strcmp (type, "const char *") == 0);
+ else
+ assert (type == NULL);
+
+ type = omp_get_interop_type_desc (obj, omp_ipr_device_num);
+ if (obj != omp_interop_none)
+ assert (strcmp (type, "int") == 0);
+ else
+ assert (type == NULL);
+
+ if (obj != omp_interop_none)
+ return;
+ assert (omp_get_interop_type_desc (obj, omp_ipr_platform) == NULL);
+ assert (omp_get_interop_type_desc (obj, omp_ipr_device) == NULL);
+ assert (omp_get_interop_type_desc (obj, omp_ipr_device_context) == NULL);
+ assert (omp_get_interop_type_desc (obj, omp_ipr_targetsync) == NULL);
+}
+
+
+void
+check_host (omp_interop_t obj)
+{
+ assert (obj == omp_interop_none);
+ check_type (obj);
+}
+
+
+void
+check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync)
+{
+ assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
+
+ omp_interop_rc_t ret_code = omp_irc_no_value;
+ omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
+
+ assert (ret_code == omp_irc_success);
+ assert (fr == expected_fr);
+
+ ret_code = omp_irc_no_value;
+ const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
+
+ assert (ret_code == omp_irc_success);
+ if (fr == omp_ifr_cuda)
+ assert (strcmp (fr_name, "cuda") == 0);
+ else if (fr == omp_ifr_cuda_driver)
+ assert (strcmp (fr_name, "cuda_driver") == 0);
+ else if (fr == omp_ifr_hip)
+ assert (strcmp (fr_name, "hip") == 0);
+ else
+ assert (0);
+
+ ret_code = omp_irc_no_value;
+ int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (vendor == 11); /* Nvidia */
+
+ ret_code = omp_irc_no_value;
+ const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (strcmp (vendor_name, "nvidia") == 0);
+
+ ret_code = omp_irc_no_value;
+ int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
+ assert (ret_code == omp_irc_success);
+ if (dev == DEFAULT_DEVICE)
+ assert (dev_num == omp_get_default_device ());
+ else
+ assert (dev_num == dev);
+
+ /* Platform: N/A. */
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+
+ /* Device: int / CUdevice / hipDevice_t -- all internally an 'int'. */
+ ret_code = omp_irc_no_value;
+ int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
+
+ /* CUDA also starts from 0 and goes to < n with cudaGetDeviceCount(&cn). */
+ assert (ret_code == omp_irc_success);
+ assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
+
+ /* Device context: N/A / CUcontext / hipCtx_t -- a pointer. */
+ ret_code = omp_irc_out_of_range;
+ void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
+
+ if (fr == omp_ifr_cuda)
+ {
+ assert (ret_code == omp_irc_no_value);
+ assert (ctx == NULL);
+ }
+ else
+ {
+ assert (ret_code == omp_irc_success);
+ assert (ctx != NULL);
+ }
+
+ /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */
+ ret_code = omp_irc_out_of_range;
+ void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
+
+ if (is_targetsync) /* no targetsync */
+ {
+ assert (ret_code == omp_irc_success);
+ assert (stream != NULL);
+ }
+ else
+ {
+ assert (ret_code == omp_irc_no_value);
+ assert (stream == NULL);
+ }
+
+ check_type (obj);
+ if (fr == omp_ifr_cuda)
+ {
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "int") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "cudaStream_t") == 0);
+ }
+ else if (fr == omp_ifr_cuda_driver)
+ {
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "CUdevice") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "CUcontext") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "CUstream") == 0);
+ }
+ else
+ {
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
+ }
+}
+
+
+void
+check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync)
+{
+ assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
+
+ omp_interop_rc_t ret_code = omp_irc_no_value;
+ omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
+
+ assert (ret_code == omp_irc_success);
+ assert (fr == expected_fr);
+
+ ret_code = omp_irc_no_value;
+ const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
+
+ assert (ret_code == omp_irc_success);
+ if (fr == omp_ifr_hip)
+ assert (strcmp (fr_name, "hip") == 0);
+ else if (fr == omp_ifr_hsa)
+ assert (strcmp (fr_name, "hsa") == 0);
+ else
+ assert (0);
+
+ ret_code = omp_irc_no_value;
+ int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (vendor == 1); /* Amd */
+
+ ret_code = omp_irc_no_value;
+ const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (strcmp (vendor_name, "amd") == 0);
+
+ ret_code = omp_irc_no_value;
+ int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
+ assert (ret_code == omp_irc_success);
+ if (dev == DEFAULT_DEVICE)
+ assert (dev_num == omp_get_default_device ());
+ else
+ assert (dev_num == dev);
+
+ /* Platform: N/A. */
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+ ret_code = omp_irc_success;
+ (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
+ assert (ret_code == omp_irc_no_value);
+
+ /* Device: hipDevice_t / hsa_agent_t* -- hip is internally an 'int'. */
+ ret_code = omp_irc_no_value;
+ if (fr == omp_ifr_hip)
+ {
+ /* HIP also starts from 0 and goes to < n as with cudaGetDeviceCount(&cn). */
+ int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
+ }
+ else
+ {
+ void *agent = omp_get_interop_ptr (obj, omp_ipr_device, &ret_code);
+ assert (ret_code == omp_irc_success);
+ assert (agent != NULL);
+ }
+
+ /* Device context: hipCtx_t / N/A -- a pointer. */
+ ret_code = omp_irc_out_of_range;
+ void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
+ if (fr == omp_ifr_hip)
+ {
+ assert (ret_code == omp_irc_success);
+ assert (ctx != NULL);
+ }
+ else
+ {
+ assert (ret_code == omp_irc_no_value);
+ assert (ctx == NULL);
+ }
+
+ /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer. */
+ ret_code = omp_irc_out_of_range;
+ void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
+
+ if (is_targetsync)
+ {
+ assert (ret_code == omp_irc_success);
+ assert (stream != NULL);
+ }
+ else
+ {
+ assert (ret_code == omp_irc_no_value);
+ assert (stream == NULL);
+ }
+
+ check_type (obj);
+ if (fr == omp_ifr_hip)
+ {
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
+ }
+ else
+ {
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hsa_agent_t *") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
+ assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hsa_queue_t *") == 0);
+ }
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-full.c b/libgomp/testsuite/libgomp.c/interop-cublas-full.c
new file mode 100644
index 0000000..2df5277
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cublas-full.c
@@ -0,0 +1,176 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-additional-options "-lcublas" } */
+
+/* NOTE: This file is also included by libgomp.c-c++-common/interop-cudablas-libonly.c
+ to test the fallback version. */
+
+/* Check whether cuBlas' daxpy works with an interop object.
+ daxpy(N, DA, DX, INCX, DY, INCY)
+ calculates (for DX = DY = 1):
+ DY(1:N) = DY(1:N) + DA * DX(1:N)
+ and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document */
+
+/* Minimal check whether CUDA works - by checking whether the API routines
+ seem to work. This includes a fallback if the header is not
+ available. */
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+ #include <cuda.h>
+ #include <cudaTypedefs.h>
+ #include <cuda_runtime.h>
+ #include <cublas_v2.h>
+
+#else
+ /* Add a poor man's fallback declaration. */
+ #if USE_CUDA_FALLBACK_HEADER
+ // Don't warn.
+ #elif !__has_include(<cuda.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda.h"
+ #elif !__has_include(<cudaTypedefs.h>)
+ #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+ #elif !__has_include(<cuda_runtime.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+ #else
+ #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+ #endif
+ #include "../../../include/cuda/cuda.h"
+
+ typedef enum {
+ CUBLAS_STATUS_SUCCESS = 0,
+ } cublasStatus_t;
+
+ typedef CUstream cudaStream_t;
+ typedef struct cublasContext* cublasHandle_t;
+
+ #define cublasCreate cublasCreate_v2
+ cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+ #define cublasSetStream cublasSetStream_v2
+ cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+ #define cublasDaxpy cublasDaxpy_v2
+ cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+#endif
+
+static int used_variant = 0;
+
+void
+run_cuBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+ used_variant = 1;
+
+ omp_interop_rc_t res;
+ cublasStatus_t stat;
+
+ omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+ assert (res == omp_irc_success && fr == omp_ifr_cuda);
+
+ cudaStream_t stream = (cudaStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+ assert (res == omp_irc_success);
+
+ cublasHandle_t handle;
+ stat = cublasCreate (&handle);
+ assert (stat == CUBLAS_STATUS_SUCCESS);
+
+ stat = cublasSetStream (handle, stream);
+ assert (stat == CUBLAS_STATUS_SUCCESS);
+
+ /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space. */
+ stat = cublasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+ assert (stat == CUBLAS_STATUS_SUCCESS);
+}
+
+
+#pragma omp declare variant(run_cuBlasdaxpy) \
+ match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+ adjust_args(need_device_ptr : dx, dy) \
+ append_args(interop(targetsync, prefer_type("cuda")))
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+ used_variant = 2;
+
+ if (incx == 1 && incy == 1)
+ #pragma omp simd
+ for (int i = 0; i < n; i++)
+ dy[i] += da * dx[i];
+ else
+ {
+ int ix = 0;
+ int iy = 0;
+ for (int i = 0; i < n; i++)
+ {
+ dy[iy] += da * dx[ix];
+ ix += incx;
+ iy += incy;
+ }
+ }
+}
+
+
+void
+run_test (int dev)
+{
+ constexpr int N = 1024;
+
+ // A = {1,2,...,N}
+ // B = {-1, -2, ..., N}
+ // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+ // = B + 3*A
+ // -> B' = {0, 2, 4, 6, ... }
+
+ double A[N], B[N];
+ double factor = 3.0;
+ for (int i = 0; i < N; i++)
+ {
+ A[i] = i;
+ B[i] = -i;
+ }
+
+ if (dev != omp_initial_device && dev != omp_get_num_devices ())
+ {
+ #pragma omp target enter data device(dev) map(A, B)
+ }
+
+ used_variant = 99;
+ #pragma omp dispatch device(dev)
+ run_daxpy (N, factor, A, 1, B, 1);
+
+ if (dev != omp_initial_device && dev != omp_get_num_devices ())
+ {
+ #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+ int tmp = omp_get_default_device ();
+ omp_set_default_device (dev);
+ if (on_device_arch_nvptx ())
+ assert (used_variant == 1);
+ else
+ assert (used_variant == 2);
+ omp_set_default_device (tmp);
+ }
+ else
+ assert (used_variant == 2);
+
+ for (int i = 0; i < N; i++)
+ assert (B[i] == 2*i);
+}
+
+int
+main ()
+{
+ int ndev = omp_get_num_devices ();
+
+ for (int dev = 0; dev <= ndev; dev++)
+ run_test (dev);
+ run_test (omp_initial_device);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c
new file mode 100644
index 0000000..89c0652
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target openacc_libcublas } */
+/* { dg-additional-options "-lcublas" } */
+
+/* Same as interop-cudablas-full.c, but also works if the header is not available. */
+
+#define USE_CUDA_FALLBACK_HEADER 1
+#include "interop-cublas-full.c"
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-full.c b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
new file mode 100644
index 0000000..38aa6b1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
@@ -0,0 +1,159 @@
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+/* NOTE: This file is also included by libgomp.c-c++-common/interop-cuda-libonly.c
+ to test the fallback version, which defines USE_CUDA_FALLBACK_HEADER. */
+
+/* Minimal check whether CUDA works - by checking whether the API routines
+ seem to work. This includes a fallback if the header is not
+ available. */
+
+#include <assert.h>
+#include <omp.h>
+
+#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+ #include <cuda.h>
+ #include <cudaTypedefs.h>
+ #include <cuda_runtime.h>
+
+#else
+ /* Add a poor man's fallback declaration. */
+ #if USE_CUDA_FALLBACK_HEADER
+ // Don't warn.
+ #elif !__has_include(<cuda.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda.h"
+ #elif !__has_include(<cudaTypedefs.h>)
+ #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+ #else
+ #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+ #endif
+ #include "../../../include/cuda/cuda.h"
+
+ typedef int cudaError_t;
+ typedef CUstream cudaStream_t;
+ enum {
+ cudaSuccess = 0
+ };
+
+ enum cudaDeviceAttr {
+ cudaDevAttrClockRate = 13,
+ cudaDevAttrMaxGridDimX = 5
+ };
+
+ cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int);
+ cudaError_t cudaStreamQuery(cudaStream_t);
+ CUresult cuCtxGetApiVersion(CUcontext, unsigned int *);
+ CUresult cuStreamGetCtx (CUstream, CUcontext *);
+#endif
+
+int
+main ()
+{
+ int ivar;
+ unsigned uvar;
+ omp_interop_rc_t res;
+ omp_interop_t obj_cuda = omp_interop_none;
+ omp_interop_t obj_cuda_driver = omp_interop_none;
+ cudaError_t cuda_err;
+ CUresult cu_err;
+
+ #pragma omp interop init(target, targetsync, prefer_type("cuda") : obj_cuda) \
+ init(target, targetsync, prefer_type("cuda_driver") : obj_cuda_driver) \
+
+ omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda, omp_ipr_fr_id, &res);
+ assert (res == omp_irc_success);
+ assert (fr == omp_ifr_cuda);
+
+ fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda_driver, omp_ipr_fr_id, &res);
+ assert (res == omp_irc_success);
+ assert (fr == omp_ifr_cuda_driver);
+
+ ivar = (int) omp_get_interop_int (obj_cuda, omp_ipr_vendor, &res);
+ assert (res == omp_irc_success);
+ assert (ivar == 11);
+
+ ivar = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_vendor, &res);
+ assert (res == omp_irc_success);
+ assert (ivar == 11);
+
+
+ /* Check whether the omp_ipr_device -> cudaDevice_t yields a valid device. */
+
+ CUdevice cu_dev = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_device, &res);
+ assert (res == omp_irc_success);
+
+ /* Assume a clock size is available and > 1 GHz; value is in kHz. */
+ cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cu_dev);
+ assert (cu_err == CUDA_SUCCESS);
+ assert (ivar > 1000000 /* kHz */);
+
+ /* Assume that the MaxGridDimX is available and > 1024. */
+ cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cu_dev);
+ assert (cu_err == CUDA_SUCCESS);
+ assert (ivar > 1024);
+
+ int cuda_dev = (int) omp_get_interop_int (obj_cuda, omp_ipr_device, &res);
+ assert (res == omp_irc_success);
+ assert (cuda_dev == (CUdevice) cu_dev); // Assume they are the same ...
+
+ /* Assume a clock size is available and > 1 GHz; value is in kHz. */
+ cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cuda_dev);
+ assert (cuda_err == cudaSuccess);
+ assert (ivar > 1000000 /* kHz */);
+
+ /* Assume that the MaxGridDimX is available and > 1024. */
+ cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cuda_dev);
+ assert (cuda_err == cudaSuccess);
+ assert (ivar > 1024);
+
+
+
+
+ /* Check whether the omp_ipr_device_context -> CUcontext yields a context. */
+
+ CUcontext cu_ctx = (CUcontext) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_device_context, &res);
+ assert (res == omp_irc_success);
+
+ /* Assume API Version > 0 for Nvidia, cudaErrorNotSupported for AMD. */
+ uvar = 99;
+ cu_err = cuCtxGetApiVersion (cu_ctx, &uvar);
+ assert (cu_err == CUDA_SUCCESS);
+ assert (uvar > 0);
+
+
+ /* Check whether the omp_ipr_targetsync -> cudaStream_t yields a stream. */
+
+ cudaStream_t cuda_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda, omp_ipr_targetsync, &res);
+ assert (res == omp_irc_success);
+
+ CUstream cu_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_targetsync, &res);
+ assert (res == omp_irc_success);
+
+ assert ((void*) cu_sm != (void*) cuda_sm); // Type compatible but should have created two streams
+
+ int dev_stream = 99;
+#if CUDA_VERSION >= 12080
+ cuda_err = cudaStreamGetDevice (cuda_sm, &dev_stream);
+ assert (cuda_err == cudaSuccess);
+#else
+ cu_err = cuStreamGetCtx (cu_sm, &cu_ctx) != CUDA_SUCCESS;
+ if (cu_err == CUDA_SUCCESS)
+ cuda_err = cuCtxPushCurrent (cu_ctx) != CUDA_SUCCESS;
+ if (cu_err == CUDA_SUCCESS)
+ cuda_err = cuCtxGetDevice (&dev_stream) != CUDA_SUCCESS;
+ if (cu_err == CUDA_SUCCESS)
+ cu_err = cuCtxPopCurrent (&cu_ctx) != CUDA_SUCCESS;
+ assert (cu_err == CUDA_SUCCESS);
+#endif
+ assert (dev_stream == cuda_dev);
+
+ /* All jobs should have been completed (as there were none none) */
+ cuda_err = cudaStreamQuery (cuda_sm);
+ assert (cuda_err == cudaSuccess);
+
+ cu_err = cuStreamQuery (cu_sm);
+ assert (cu_err == CUDA_SUCCESS);
+
+ #pragma omp interop destroy(obj_cuda, obj_cuda_driver)
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
new file mode 100644
index 0000000..17cbb15
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target openacc_libcudart } */
+/* { dg-require-effective-target openacc_libcuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+/* Same as interop-cuda-full.c, but also works if the header is not available. */
+
+#define USE_CUDA_FALLBACK_HEADER 1
+#include "interop-cuda-full.c"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
new file mode 100644
index 0000000..d7725fc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target gomp_hip_header_amd } */
+/* { dg-require-effective-target gomp_libamdhip64 } */
+/* { dg-additional-options "-lamdhip64" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
new file mode 100644
index 0000000..2584537
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target gomp_libamdhip64 } */
+/* { dg-additional-options "-lamdhip64" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
new file mode 100644
index 0000000..79af47d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-require-effective-target gomp_hip_header_nvidia } */
+/* { dg-additional-options "-lcuda -lcudart -Wno-deprecated-declarations" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
new file mode 100644
index 0000000..4586398
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
@@ -0,0 +1,10 @@
+/* { dg-require-effective-target openacc_libcudart } */
+/* { dg-require-effective-target openacc_libcuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+#define USE_CUDA_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
new file mode 100644
index 0000000..4186984
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
@@ -0,0 +1,9 @@
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip.h b/libgomp/testsuite/libgomp.c/interop-hip.h
new file mode 100644
index 0000000..20a1ccb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip.h
@@ -0,0 +1,234 @@
+/* Minimal check whether HIP works - by checking whether the API routines
+ seem to work. This includes various fallbacks if the header is not
+ available. */
+
+#include <assert.h>
+#include <omp.h>
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+ #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+ #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if __has_include(<hip/hip_runtime_api.h>) && !defined(USE_HIP_FALLBACK_HEADER)
+ #include <hip/hip_runtime_api.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+ /* Add a poor man's fallback declaration. */
+ #if !defined(USE_HIP_FALLBACK_HEADER)
+ #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_AMD__"
+ #endif
+
+ typedef struct ihipStream_t* hipStream_t;
+ typedef struct ihipCtx_t* hipCtx_t;
+ typedef int hipError_t;
+ typedef int hipDevice_t;
+ enum {
+ hipSuccess = 0,
+ hipErrorNotSupported = 801
+ };
+
+ typedef enum hipDeviceAttribute_t {
+ hipDeviceAttributeClockRate = 5,
+ hipDeviceAttributeMaxGridDimX = 29
+ } hipDeviceAttribute_t;
+
+ hipError_t hipDeviceGetAttribute (int *, hipDeviceAttribute_t, hipDevice_t);
+ hipError_t hipCtxGetApiVersion (hipCtx_t, int *);
+ hipError_t hipStreamGetDevice (hipStream_t, hipDevice_t *);
+ hipError_t hipStreamQuery (hipStream_t);
+
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+ /* Add a poor man's fallback declaration. */
+ #if !defined(USE_HIP_FALLBACK_HEADER)
+ #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_NVIDIA__"
+ #endif
+
+ #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+ #include <cuda.h>
+ #include <cudaTypedefs.h>
+ #include <cuda_runtime.h>
+ #else
+ #if defined(USE_CUDA_FALLBACK_HEADER)
+ // no warning
+ #elif !__has_include(<cuda.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda.h"
+ #elif !__has_include(<cudaTypedefs.h>)
+ #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+ #else
+ #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+ #endif
+
+ #include "../../../include/cuda/cuda.h"
+
+ typedef int cudaError_t;
+ enum {
+ cudaSuccess = 0
+ };
+
+ enum cudaDeviceAttr {
+ cudaDevAttrClockRate = 13,
+ cudaDevAttrMaxGridDimX = 5
+ };
+
+ cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int);
+ CUresult cuCtxGetApiVersion(CUcontext, unsigned int *);
+ CUresult cuStreamGetCtx (CUstream, CUcontext *);
+ #endif
+
+ typedef CUstream hipStream_t;
+ typedef CUcontext hipCtx_t;
+ typedef CUdevice hipDevice_t;
+
+ typedef int hipError_t;
+ typedef int hipDevice_t;
+ enum {
+ hipSuccess = 0,
+ hipErrorNotSupported = 801
+ };
+
+
+ typedef enum hipDeviceAttribute_t {
+ hipDeviceAttributeClockRate = 5,
+ hipDeviceAttributeMaxGridDimX = 29
+ } hipDeviceAttribute_t;
+
+ inline static hipError_t
+ hipDeviceGetAttribute (int *ival, hipDeviceAttribute_t attr, hipDevice_t dev)
+ {
+ enum cudaDeviceAttr cuattr;
+ switch (attr)
+ {
+ case hipDeviceAttributeClockRate:
+ cuattr = cudaDevAttrClockRate;
+ break;
+ case hipDeviceAttributeMaxGridDimX:
+ cuattr = cudaDevAttrMaxGridDimX;
+ break;
+ default:
+ assert (0);
+ }
+ return cudaDeviceGetAttribute (ival, cuattr, dev) != cudaSuccess;
+ }
+
+ inline static hipError_t
+ hipCtxGetApiVersion (hipCtx_t ctx, int *ver)
+ {
+ unsigned uver;
+ hipError_t err;
+ err = cuCtxGetApiVersion (ctx, &uver) != CUDA_SUCCESS;
+ *ver = (int) uver;
+ return err;
+ }
+
+ inline static hipError_t
+ hipStreamGetDevice (hipStream_t stream, hipDevice_t *dev)
+ {
+#if CUDA_VERSION >= 12080
+ return cudaStreamGetDevice (stream, dev);
+#else
+ hipError_t err;
+ CUcontext ctx;
+ err = cuStreamGetCtx (stream, &ctx) != CUDA_SUCCESS;
+ if (err == hipSuccess)
+ err = cuCtxPushCurrent (ctx) != CUDA_SUCCESS;
+ if (err == hipSuccess)
+ err = cuCtxGetDevice (dev) != CUDA_SUCCESS;
+ if (err == hipSuccess)
+ err = cuCtxPopCurrent (&ctx) != CUDA_SUCCESS;
+ return err;
+#endif
+ }
+
+ inline static hipError_t
+ hipStreamQuery (hipStream_t stream)
+ {
+ return cuStreamQuery (stream) != CUDA_SUCCESS;
+ }
+
+#else
+ #error "should be unreachable"
+#endif
+
+int
+main ()
+{
+ int ivar;
+ omp_interop_rc_t res;
+ omp_interop_t obj = omp_interop_none;
+ hipError_t hip_err;
+
+ #pragma omp interop init(target, targetsync, prefer_type("hip") : obj)
+
+ omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &res);
+ assert (res == omp_irc_success);
+ assert (fr == omp_ifr_hip);
+
+ ivar = (int) omp_get_interop_int (obj, omp_ipr_vendor, &res);
+ assert (res == omp_irc_success);
+ int vendor_is_amd = ivar == 1;
+ #if defined(__HIP_PLATFORM_AMD__)
+ assert (ivar == 1);
+ #elif defined(__HIP_PLATFORM_NVIDIA__)
+ assert (ivar == 11);
+ #else
+ assert (0);
+ #endif
+
+
+ /* Check whether the omp_ipr_device -> hipDevice_t yields a valid device. */
+
+ hipDevice_t hip_dev = (int) omp_get_interop_int (obj, omp_ipr_device, &res);
+ assert (res == omp_irc_success);
+
+ /* Assume a clock size is available and > 1 GHz; value is in kHz. */
+ hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeClockRate, hip_dev);
+ assert (hip_err == hipSuccess);
+ assert (ivar > 1000000 /* kHz */);
+
+ /* Assume that the MaxGridDimX is available and > 1024. */
+ hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeMaxGridDimX, hip_dev);
+ assert (hip_err == hipSuccess);
+ assert (ivar > 1024);
+
+
+ /* Check whether the omp_ipr_device_context -> hipCtx_t yields a context. */
+
+ hipCtx_t hip_ctx = (hipCtx_t) omp_get_interop_ptr (obj, omp_ipr_device_context, &res);
+ assert (res == omp_irc_success);
+
+ /* Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD. */
+ ivar = -99;
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ hip_err = hipCtxGetApiVersion (hip_ctx, &ivar);
+ #pragma GCC diagnostic pop
+
+ if (vendor_is_amd)
+ assert (hip_err == hipErrorNotSupported && ivar == -99);
+ else
+ {
+ assert (hip_err == hipSuccess);
+ assert (ivar > 0);
+ }
+
+
+ /* Check whether the omp_ipr_targetsync -> hipStream_t yields a stream. */
+
+ hipStream_t hip_sm = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+ assert (res == omp_irc_success);
+
+ hipDevice_t dev_stream = 99;
+ hip_err = hipStreamGetDevice (hip_sm, &dev_stream);
+ assert (hip_err == hipSuccess);
+ assert (dev_stream == hip_dev);
+
+ /* All jobs should have been completed (as there were none none) */
+ hip_err = hipStreamQuery (hip_sm);
+ assert (hip_err == hipSuccess);
+
+ #pragma omp interop destroy(obj)
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c
new file mode 100644
index 0000000..53c05bd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target gomp_hip_header_amd } */
+/* { dg-require-effective-target gomp_libhipblas } */
+/* { dg-additional-options "-lhipblas" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c
new file mode 100644
index 0000000..0ea3133
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target gomp_libhipblas } */
+/* { dg-additional-options "-lhipblas" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c
new file mode 100644
index 0000000..ed428c6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-require-effective-target gomp_hip_header_nvidia } */
+/* { dg-additional-options "-lcublas -Wno-deprecated-declarations" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c
new file mode 100644
index 0000000..1a31b30
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c
@@ -0,0 +1,9 @@
+/* { dg-require-effective-target openacc_libcublas } */
+/* { dg-additional-options "-lcublas" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+#define USE_CUDA_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c
new file mode 100644
index 0000000..f85c13b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-additional-options "-lcublas" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas.h b/libgomp/testsuite/libgomp.c/interop-hipblas.h
new file mode 100644
index 0000000..d7cb174
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas.h
@@ -0,0 +1,240 @@
+/* Check whether hipBlas' daxpy works with an interop object.
+ daxpy(N, DA, DX, INCX, DY, INCY)
+ calculates (for DX = DY = 1):
+ DY(1:N) = DY(1:N) + DA * DX(1:N)
+ and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document */
+
+/* Minimal check whether HIP works - by checking whether the API routines
+ seem to work. This includes a fallback if the header is not
+ available. */
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+ #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+ #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<hipblas/hipblas.h>) && (__has_include(<library_types.h>) || !defined(__HIP_PLATFORM_NVIDIA__)) && !defined(USE_HIP_FALLBACK_HEADER)
+ #ifdef __HIP_PLATFORM_NVIDIA__
+ /* There seems to be an issue with hip/library_types.h including
+ CUDA's "library_types.h". Include CUDA's one explicitly here.
+ Could possibly worked around by using -isystem vs. -I. */
+ #include <library_types.h>
+
+ /* For some reasons, the following symbols do not seem to get
+ mapped from HIP to CUDA, causing link errors. */
+ #define hipblasSetStream cublasSetStream_v2
+ #define hipblasDaxpy cublasDaxpy_v2
+ #define hipblasCreate cublasCreate_v2
+ #endif
+ #include <hipblas/hipblas.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+ /* Add a poor man's fallback declaration. */
+ #if !defined(USE_HIP_FALLBACK_HEADER)
+ #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_AMD__"
+ #endif
+
+ typedef enum
+ {
+ HIPBLAS_STATUS_SUCCESS = 0
+
+ } hipblasStatus_t;
+
+ typedef struct ihipStream_t* hipStream_t;
+ typedef void* hipblasHandle_t;
+
+ hipblasStatus_t hipblasCreate (hipblasHandle_t*);
+ hipblasStatus_t hipblasSetStream (hipblasHandle_t, hipStream_t);
+ hipblasStatus_t hipblasDaxpy (hipblasHandle_t, int, const double*, const double*, int, double*, int);
+
+#else
+ /* Add a poor man's fallback declaration. */
+ #if !defined(USE_HIP_FALLBACK_HEADER)
+ #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_NVIDA__"
+ #endif
+
+ #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+ #include <cuda.h>
+ #include <cudaTypedefs.h>
+ #include <cuda_runtime.h>
+ #include <cublas_v2.h>
+
+ #else
+ /* Add a poor man's fallback declaration. */
+ #if defined(USE_CUDA_FALLBACK_HEADER)
+ // no warning
+ #elif !__has_include(<cuda.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda.h"
+ #elif !__has_include(<cudaTypedefs.h>)
+ #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+ #elif !__has_include(<cuda_runtime.h>)
+ #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+ #else
+ #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+ #endif
+ #include "../../../include/cuda/cuda.h"
+
+ typedef enum {
+ CUBLAS_STATUS_SUCCESS = 0,
+ } cublasStatus_t;
+
+ typedef CUstream cudaStream_t;
+ typedef struct cublasContext* cublasHandle_t;
+
+ #define cublasCreate cublasCreate_v2
+ cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+ #define cublasSetStream cublasSetStream_v2
+ cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+ #define cublasDaxpy cublasDaxpy_v2
+ cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+ #endif
+
+ #define HIPBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
+ #define hipblasStatus_t cublasStatus_t
+ #define hipStream_t cudaStream_t
+ #define hipblasHandle_t cublasHandle_t
+ #define hipblasCreate cublasCreate
+ #define hipblasSetStream cublasSetStream
+ #define hipblasDaxpy cublasDaxpy
+#endif
+
+static int used_variant = 0;
+
+void
+run_hipBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+ used_variant = 1;
+
+ omp_interop_rc_t res;
+ hipblasStatus_t stat;
+
+ omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+ assert (res == omp_irc_success && fr == omp_ifr_hip);
+
+ hipStream_t stream = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+ assert (res == omp_irc_success);
+
+ hipblasHandle_t handle;
+ stat = hipblasCreate (&handle);
+ assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+ stat = hipblasSetStream (handle, stream);
+ assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+ /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space. */
+ stat = hipblasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+ assert (stat == HIPBLAS_STATUS_SUCCESS);
+}
+
+#if defined(__HIP_PLATFORM_AMD__)
+#pragma omp declare variant(run_hipBlasdaxpy) \
+ match(construct={dispatch}, target_device={kind(nohost), arch("amdgcn")}) \
+ adjust_args(need_device_ptr : dx, dy) \
+ append_args(interop(targetsync, prefer_type("hip")))
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+#pragma omp declare variant(run_hipBlasdaxpy) \
+ match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+ adjust_args(need_device_ptr : dx, dy) \
+ append_args(interop(targetsync, prefer_type("hip")))
+#else
+ #error "wrong platform"
+#endif
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+ used_variant = 2;
+
+ if (incx == 1 && incy == 1)
+ #pragma omp simd
+ for (int i = 0; i < n; i++)
+ dy[i] += da * dx[i];
+ else
+ {
+ int ix = 0;
+ int iy = 0;
+ for (int i = 0; i < n; i++)
+ {
+ dy[iy] += da * dx[ix];
+ ix += incx;
+ iy += incy;
+ }
+ }
+}
+
+
+void
+run_test (int dev)
+{
+ constexpr int N = 1024;
+
+ // A = {1,2,...,N}
+ // B = {-1, -2, ..., N}
+ // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+ // = B + 3*A
+ // -> B' = {0, 2, 4, 6, ... }
+
+ double A[N], B[N];
+ double factor = 3.0;
+ for (int i = 0; i < N; i++)
+ {
+ A[i] = i;
+ B[i] = -i;
+ }
+
+ if (dev != omp_initial_device && dev != omp_get_num_devices ())
+ {
+ #pragma omp target enter data device(dev) map(A, B)
+ }
+
+ used_variant = 99;
+ #pragma omp dispatch device(dev)
+ run_daxpy (N, factor, A, 1, B, 1);
+
+ if (dev != omp_initial_device && dev != omp_get_num_devices ())
+ {
+ #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+ int tmp = omp_get_default_device ();
+ omp_set_default_device (dev);
+#if defined(__HIP_PLATFORM_AMD__)
+ if (on_device_arch_gcn ())
+#else
+ if (on_device_arch_nvptx ())
+#endif
+ assert (used_variant == 1);
+ else
+ assert (used_variant == 2);
+ omp_set_default_device (tmp);
+ }
+ else
+ assert (used_variant == 2);
+
+ for (int i = 0; i < N; i++)
+ assert (B[i] == 2*i);
+}
+
+int
+main ()
+{
+ int ndev = omp_get_num_devices ();
+
+ for (int dev = 0; dev <= ndev; dev++)
+ run_test (dev);
+ run_test (omp_initial_device);
+
+ return 0;
+}