70 files changed, 3481 insertions, 16 deletions
diff --git a/libgomp/testsuite/libgomp.c/alloc-managed-1.c b/libgomp/testsuite/libgomp.c/alloc-managed-1.c
new file mode 100644
index 0000000..88ddcf3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-managed-1.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-require-effective-target omp_managedmem } */
+/* { dg-additional-options "-foffload-options=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn_with_xnack } } */
+
+/* Check that omp_alloc can allocate Managed Memory, and that host and target
+   can see the data, at the same address, without a mapping.  */
+
+#include <omp.h>
+#include <stdint.h>
+
+int
+main ()
+{
+  int *a = (int *) omp_alloc(sizeof(int), ompx_gnu_managed_mem_alloc);
+  if (!a)
+    __builtin_abort ();
+
+  *a = 42;
+  uintptr_t a_p = (uintptr_t)a;
+
+  #pragma omp target is_device_ptr(a)
+    {
+      if (*a != 42 || a_p != (uintptr_t)a)
+	__builtin_abort ();
+    }
+
+  omp_free(a, ompx_gnu_managed_mem_alloc);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/alloc-managed-2.c b/libgomp/testsuite/libgomp.c/alloc-managed-2.c
new file mode 100644
index 0000000..660f6e6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-managed-2.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-require-effective-target omp_managedmem } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn_with_xnack } } */
+
+/* Check that omp_calloc can allocate Managed Memory, and that host and target
+   can see the data, at the same address, without a mapping.  */
+
+#include <omp.h>
+#include <stdint.h>
+
+int
+main ()
+{
+  int *a = (int *) omp_calloc(5, sizeof(int), ompx_gnu_managed_mem_alloc);
+  if (!a)
+    __builtin_abort ();
+
+  /* Check that memory is zero-initialized */
+  for (int i = 0; i < 5; i++)
+    if (a[i] != 0)
+      __builtin_abort ();
+
+  a[0] = 42;
+  a[4] = 99;
+  uintptr_t a_p = (uintptr_t)a;
+
+  #pragma omp target is_device_ptr(a)
+    {
+      if (a[0] != 42 || a[4] != 99 || a_p != (uintptr_t)a)
+	__builtin_abort ();
+      /* Check zero-initialization on device side */
+      for (int i = 1; i < 4; i++)
+	if (a[i] != 0)
+	  __builtin_abort ();
+    }
+
+  omp_free(a, ompx_gnu_managed_mem_alloc);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/alloc-managed-3.c b/libgomp/testsuite/libgomp.c/alloc-managed-3.c
new file mode 100644
index 0000000..fefdeb3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-managed-3.c
@@ -0,0 +1,45 @@
+/* { dg-do run } */
+/* { dg-require-effective-target omp_managedmem } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn_with_xnack } } */
+
+/* Check that omp_realloc can allocate Managed Memory, and that host and target
+   can see the data, at the same address, without a mapping.  */
+
+#include <omp.h>
+#include <stdint.h>
+
+int
+main ()
+{
+  int *a = (int *) omp_alloc(2 * sizeof(int), ompx_gnu_managed_mem_alloc);
+  if (!a)
+    __builtin_abort ();
+
+  a[0] = 42;
+  a[1] = 43;
+
+  /* Reallocate to larger size */
+  int *b = (int *) omp_realloc(a, 5 * sizeof(int), ompx_gnu_managed_mem_alloc,
+			       ompx_gnu_managed_mem_alloc);
+  if (!b)
+    __builtin_abort ();
+
+  /* Check that original data is preserved */
+  if (b[0] != 42 || b[1] != 43)
+    __builtin_abort ();
+
+  b[2] = 44;
+  b[3] = 45;
+  b[4] = 46;
+  uintptr_t b_p = (uintptr_t)b;
+
+  #pragma omp target is_device_ptr(b)
+    {
+      if (b[0] != 42 || b[1] != 43 || b[2] != 44 || b[3] != 45 || b[4] != 46
+	  || b_p != (uintptr_t)b)
+	__builtin_abort ();
+    }
+
+  omp_free(b, ompx_gnu_managed_mem_alloc);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/alloc-managed-4.c b/libgomp/testsuite/libgomp.c/alloc-managed-4.c
new file mode 100644
index 0000000..577e3e2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-managed-4.c
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-require-effective-target omp_managedmem } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn_with_xnack } } */
+/* { dg-shouldfail "" } */
+/* { dg-output "libgomp: attempted to free managed memory at 0x\[0-9a-f\]+, but the default device is set to the host device" } */
+
+/* Check that omp_free emits an error if the default device has been changed
+   to the host device.  */
+
+#include <omp.h>
+#include <stdint.h>
+
+int
+main ()
+{
+  int *a = (int *) omp_alloc(2 * sizeof(int), ompx_gnu_managed_mem_alloc);
+  if (!a)
+    __builtin_abort ();
+
+  omp_set_default_device (omp_initial_device);
+  omp_free(a, ompx_gnu_managed_mem_alloc);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-1.c b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
index 672f245..693f903 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c
@@ -2,6 +2,8 @@
 
 /* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */
 
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
 /* Test that pinned memory works.  */
 
 #include <stdio.h>
@@ -63,10 +65,16 @@ verify0 (char *p, size_t s)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* Allocate at least a page each time, allowing space for overhead,
      but stay within the ulimit.  */
   const int SIZE = PAGE_SIZE - 128;
   CHECK_SIZE (SIZE * 5);  // This is intended to help diagnose failures
+#endif
 
   const omp_alloctrait_t traits[] = {
       { omp_atk_pinned, 1 }
@@ -88,21 +96,39 @@ main ()
     abort ();
 
   int amount = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount != 0)
+    abort ();
+#else
   if (amount == 0)
     abort ();
+#endif
 
   p = omp_realloc (p, SIZE * 2, allocator, allocator);
 
   int amount2 = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount2 != 0)
+    abort ();
+#else
   if (amount2 <= amount)
     abort ();
+#endif
 
   /* SIZE*2 ensures that it doesn't slot into the space possibly
      vacated by realloc.  */
   p = omp_calloc (1, SIZE * 2, allocator);
 
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (get_pinned_mem () != 0)
+    abort ();
+#else
   if (get_pinned_mem () <= amount2)
     abort ();
+#endif
 
   verify0 (p, SIZE * 2);
 
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-2.c b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
index b6d1d83..e7ac64e 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c
@@ -2,6 +2,8 @@
 
 /* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */
 
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
 /* Test that pinned memory works (pool_size code path).  */
 
 #include <stdio.h>
@@ -63,10 +65,16 @@ verify0 (char *p, size_t s)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* Allocate at least a page each time, allowing space for overhead,
      but stay within the ulimit.  */
   const int SIZE = PAGE_SIZE - 128;
   CHECK_SIZE (SIZE * 5);  // This is intended to help diagnose failures
+#endif
 
   const omp_alloctrait_t traits[] = {
       { omp_atk_pinned, 1 },
@@ -89,16 +97,28 @@ main ()
     abort ();
 
   int amount = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount != 0)
+    abort ();
+#else
   if (amount == 0)
     abort ();
+#endif
 
   p = omp_realloc (p, SIZE * 2, allocator, allocator);
   if (!p)
     abort ();
 
   int amount2 = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount2 != 0)
+    abort ();
+#else
   if (amount2 <= amount)
     abort ();
+#endif
 
   /* SIZE*2 ensures that it doesn't slot into the space possibly
      vacated by realloc.  */
@@ -106,8 +126,14 @@ main ()
   if (!p)
     abort ();
 
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (get_pinned_mem () != 0)
+    abort ();
+#else
   if (get_pinned_mem () <= amount2)
     abort ();
+#endif
 
   verify0 (p, SIZE * 2);
 
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-3.c b/libgomp/testsuite/libgomp.c/alloc-pinned-3.c
index 11dc818..250cb55 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-3.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-3.c
@@ -1,5 +1,7 @@
 /* { dg-do run } */
 
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
 /* Test that pinned memory fails correctly.  */
 
 #include <stdio.h>
@@ -75,8 +77,15 @@ verify0 (char *p, size_t s)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* This needs to be large enough to cover multiple pages.  */
   const int SIZE = PAGE_SIZE * 4;
+#endif
+  const int PIN_LIMIT = PAGE_SIZE * 2;
 
   /* Pinned memory, no fallback.  */
   const omp_alloctrait_t traits1[] = {
@@ -101,23 +110,34 @@ main ()
 #endif
 
   /* Ensure that the limit is smaller than the allocation.  */
-  set_pin_limit (SIZE / 2);
+  set_pin_limit (PIN_LIMIT);
 
   // Sanity check
   if (get_pinned_mem () != 0)
     abort ();
 
-  // Should fail
   void *p1 = omp_alloc (SIZE, allocator1);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p1)
+    abort ();
+#else
+  // Should fail
   if (p1)
     abort ();
+#endif
 
-  // Should fail
   void *p2 = omp_calloc (1, SIZE, allocator1);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p2)
+    abort ();
+#else
+  // Should fail
   if (p2)
     abort ();
+#endif
 
-  // Should fall back
   void *p3 = omp_alloc (SIZE, allocator2);
   if (!p3)
     abort ();
@@ -128,16 +148,29 @@ main ()
     abort ();
   verify0 (p4, SIZE);
 
-  // Should fail to realloc
   void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
   void *p5 = omp_realloc (notpinned, SIZE, allocator1, omp_default_mem_alloc);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'; does reallocate.
+  if (!notpinned || !p5 || p5 == notpinned)
+    abort ();
+#else
+  // Should fail to realloc
   if (!notpinned || p5)
     abort ();
+#endif
 
-  // Should fall back to no realloc needed
+#ifdef OFFLOAD_DEVICE_NVPTX
+  void *p6 = omp_realloc (p5, SIZE, allocator2, allocator1);
+  // Does reallocate.
+  if (p5 == p6)
+    abort ();
+#else
   void *p6 = omp_realloc (notpinned, SIZE, allocator2, omp_default_mem_alloc);
+  // Should fall back to no realloc needed
   if (p6 != notpinned)
     abort ();
+#endif
 
   // No memory should have been pinned
   int amount = get_pinned_mem ();
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-4.c b/libgomp/testsuite/libgomp.c/alloc-pinned-4.c
index 2ecd01f..b7a9966 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-4.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-4.c
@@ -1,5 +1,7 @@
 /* { dg-do run } */
 
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
 /* Test that pinned memory fails correctly, pool_size code path.  */
 
 #include <stdio.h>
@@ -75,8 +77,15 @@ verify0 (char *p, size_t s)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* This needs to be large enough to cover multiple pages.  */
   const int SIZE = PAGE_SIZE * 4;
+#endif
+  const int PIN_LIMIT = PAGE_SIZE * 2;
 
   /* Pinned memory, no fallback.  */
   const omp_alloctrait_t traits1[] = {
@@ -103,21 +112,33 @@ main ()
 #endif
 
   /* Ensure that the limit is smaller than the allocation.  */
-  set_pin_limit (SIZE / 2);
+  set_pin_limit (PIN_LIMIT);
 
   // Sanity check
   if (get_pinned_mem () != 0)
     abort ();
 
-  // Should fail
   void *p = omp_alloc (SIZE, allocator1);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p)
+    abort ();
+#else
+  // Should fail
   if (p)
     abort ();
+#endif
 
-  // Should fail
   p = omp_calloc (1, SIZE, allocator1);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p)
+    abort ();
+#else
+  // Should fail
   if (p)
     abort ();
+#endif
 
   // Should fall back
   p = omp_alloc (SIZE, allocator2);
@@ -130,16 +151,29 @@ main ()
     abort ();
   verify0 (p, SIZE);
 
-  // Should fail to realloc
   void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
   p = omp_realloc (notpinned, SIZE, allocator1, omp_default_mem_alloc);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'; does reallocate.
+  if (!notpinned || !p || p == notpinned)
+    abort ();
+#else
+  // Should fail to realloc
   if (!notpinned || p)
     abort ();
+#endif
 
-  // Should fall back to no realloc needed
+#ifdef OFFLOAD_DEVICE_NVPTX
+  void *p_ = omp_realloc (p, SIZE, allocator2, allocator1);
+  // Does reallocate.
+  if (p_ == p)
+    abort ();
+#else
   p = omp_realloc (notpinned, SIZE, allocator2, omp_default_mem_alloc);
+  // Should fall back to no realloc needed
   if (p != notpinned)
     abort ();
+#endif
 
   // No memory should have been pinned
   int amount = get_pinned_mem ();
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-5.c b/libgomp/testsuite/libgomp.c/alloc-pinned-5.c
index 0ba2feb..cc77764 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-5.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-5.c
@@ -2,6 +2,8 @@
 
 /* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */
 
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
 /* Test that ompx_gnu_pinned_mem_alloc works.  */
 
 #include <stdio.h>
@@ -63,10 +65,16 @@ verify0 (char *p, size_t s)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* Allocate at least a page each time, allowing space for overhead,
      but stay within the ulimit.  */
   const int SIZE = PAGE_SIZE - 128;
   CHECK_SIZE (SIZE * 5);
+#endif
 
   // Sanity check
   if (get_pinned_mem () != 0)
@@ -77,22 +85,40 @@ main ()
     abort ();
 
   int amount = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount != 0)
+    abort ();
+#else
   if (amount == 0)
     abort ();
+#endif
 
   p = omp_realloc (p, SIZE * 2, ompx_gnu_pinned_mem_alloc,
 		   ompx_gnu_pinned_mem_alloc);
 
   int amount2 = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount2 != 0)
+    abort ();
+#else
   if (amount2 <= amount)
     abort ();
+#endif
 
   /* SIZE*2 ensures that it doesn't slot into the space possibly
      vacated by realloc.  */
   p = omp_calloc (1, SIZE * 2, ompx_gnu_pinned_mem_alloc);
 
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (get_pinned_mem () != 0)
+    abort ();
+#else
   if (get_pinned_mem () <= amount2)
     abort ();
+#endif
 
   verify0 (p, SIZE * 2);
 
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-6.c b/libgomp/testsuite/libgomp.c/alloc-pinned-6.c
index 99f1269..6dd5544 100644
--- a/libgomp/testsuite/libgomp.c/alloc-pinned-6.c
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-6.c
@@ -1,4 +1,5 @@
 /* { dg-do run } */
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
 
 /* Test that ompx_gnu_pinned_mem_alloc fails correctly.  */
 
@@ -66,32 +67,57 @@ set_pin_limit (int size)
 int
 main ()
 {
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* Go big or go home.
+     The OS ulimit does not affect memory locked via CUDA for NVPTX devices. */
+  const int SIZE = 40 * 1024 * 1024;
+#else
   /* Allocate at least a page each time, but stay within the ulimit.  */
   const int SIZE = PAGE_SIZE * 4;
+#endif
+  const int PIN_LIMIT = PAGE_SIZE*2;
 
   /* Ensure that the limit is smaller than the allocation.  */
-  set_pin_limit (SIZE / 2);
+  set_pin_limit (PIN_LIMIT);
 
   // Sanity check
   if (get_pinned_mem () != 0)
     abort ();
 
-  // Should fail
   void *p = omp_alloc (SIZE, ompx_gnu_pinned_mem_alloc);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p)
+    abort ();
+#else
+  // Should fail
   if (p)
     abort ();
+#endif
 
-  // Should fail
   p = omp_calloc (1, SIZE, ompx_gnu_pinned_mem_alloc);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'.
+  if (!p)
+    abort ();
+#else
+  // Should fail
   if (p)
     abort ();
+#endif
 
-  // Should fail to realloc
   void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
   p = omp_realloc (notpinned, SIZE, ompx_gnu_pinned_mem_alloc,
 		   omp_default_mem_alloc);
+#ifdef OFFLOAD_DEVICE_NVPTX
+  // Doesn't care about 'set_pin_limit'; does reallocate.
+  if (!notpinned || !p || p == notpinned)
+    abort ();
+#else
+  // Should fail to realloc
   if (!notpinned || p)
     abort ();
+#endif
 
   // No memory should have been pinned
   int amount = get_pinned_mem ();
diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-8.c b/libgomp/testsuite/libgomp.c/alloc-pinned-8.c
new file mode 100644
index 0000000..0fc737b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/alloc-pinned-8.c
@@ -0,0 +1,122 @@
+/* { dg-do run } */
+
+/* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */
+
+/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
+
+/* Test that pinned memory works for small allocations.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/resource.h>
+
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#define CHECK_SIZE(SIZE) { \
+  struct rlimit limit; \
+  if (getrlimit (RLIMIT_MEMLOCK, &limit) \
+      || limit.rlim_cur <= SIZE) \
+    fprintf (stderr, "insufficient lockable memory; please increase ulimit\n"); \
+  }
+
+int
+get_pinned_mem ()
+{
+  int pid = getpid ();
+  char buf[100];
+  sprintf (buf, "/proc/%d/status", pid);
+
+  FILE *proc = fopen (buf, "r");
+  if (!proc)
+    abort ();
+  while (fgets (buf, 100, proc))
+    {
+      int val;
+      if (sscanf (buf, "VmLck: %d", &val))
+	{
+	  fclose (proc);
+	  return val;
+	}
+    }
+  abort ();
+}
+#else
+#error "OS unsupported"
+#endif
+
+static void
+verify0 (char *p, size_t s)
+{
+  for (size_t i = 0; i < s; ++i)
+    if (p[i] != 0)
+      abort ();
+}
+
+#include <omp.h>
+
+int
+main ()
+{
+  /* Choose a small size where all our allocations fit on one page.  */
+  const int SIZE = 10;
+#ifndef OFFLOAD_DEVICE_NVPTX
+  CHECK_SIZE (SIZE*4);
+#endif
+
+  const omp_alloctrait_t traits[] = {
+      { omp_atk_pinned, 1 }
+  };
+  omp_allocator_handle_t allocator = omp_init_allocator (omp_default_mem_space, 1, traits);
+
+  // Sanity check
+  if (get_pinned_mem () != 0)
+    abort ();
+
+  void *p = omp_alloc (SIZE, allocator);
+  if (!p)
+    abort ();
+
+  int amount = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount != 0)
+    abort ();
+#else
+  if (amount == 0)
+    abort ();
+#endif
+
+  p = omp_realloc (p, SIZE * 2, allocator, allocator);
+
+  int amount2 = get_pinned_mem ();
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (amount2 != 0)
+    abort ();
+#else
+  /* A small allocation should not allocate another page.  */
+  if (amount2 != amount)
+    abort ();
+#endif
+
+  p = omp_calloc (1, SIZE, allocator);
+
+#ifdef OFFLOAD_DEVICE_NVPTX
+  /* This doesn't show up as process 'VmLck'ed memory.  */
+  if (get_pinned_mem () != 0)
+    abort ();
+#else
+  /* A small allocation should not allocate another page.  */
+  if (get_pinned_mem () != amount2)
+    abort ();
+#endif
+
+  verify0 (p, SIZE);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/append-args-fr-1.c b/libgomp/testsuite/libgomp.c/append-args-fr-1.c
new file mode 100644
index 0000000..2fd7eda
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/append-args-fr-1.c
@@ -0,0 +1,232 @@
+/* { dg-do run } */
+
+#include "append-args-fr.h"
+
+enum { host_device, nvptx_device, gcn_device } used_device_type, used_device_type2;
+static int used_device_num, used_device_num2;
+static omp_interop_fr_t expected_fr, expected_fr2;
+static _Bool is_targetsync, is_targetsync2;
+
+void
+check_interop (omp_interop_t obj)
+{
+  if (used_device_type == host_device)
+    check_host (obj);
+  else if (used_device_type == nvptx_device)
+    check_nvptx (obj, used_device_num, expected_fr, is_targetsync);
+  else if (used_device_type == gcn_device)
+    check_gcn (obj, used_device_num, expected_fr, is_targetsync);
+  else
+    __builtin_unreachable ();
+
+  #pragma omp interop use(obj)
+}
+
+void
+check_interop2 (omp_interop_t obj, omp_interop_t obj2)
+{
+  check_interop (obj);
+
+  #pragma omp interop use(obj2)
+
+  if (used_device_type2 == host_device)
+    check_host (obj2);
+  else if (used_device_type2 == nvptx_device)
+    check_nvptx (obj2, used_device_num2, expected_fr2, is_targetsync2);
+  else if (used_device_type2 == gcn_device)
+    check_gcn (obj2, used_device_num2, expected_fr2, is_targetsync2);
+  else
+    __builtin_unreachable ();
+}
+
+
+/* Check no args + one interop arg - and no prefer_type.  */
+
+int f0_1_tg_ (omp_interop_t obj) { check_interop (obj); return 4242; }
+#pragma omp declare variant(f0_1_tg_)     match(construct={dispatch}) append_args(interop(target))
+int f0_1_tg () { assert (false); return 42; }
+
+void f0_1_tgsy_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_)   match(construct={dispatch}) append_args(interop(targetsync))
+void f0_1_tgsy () { assert (false); }
+
+int f0_1_tgtgsy_ (omp_interop_t obj) { check_interop (obj); return 3333; }
+#pragma omp declare variant(f0_1_tgtgsy_) match(construct={dispatch}) append_args(interop(targetsync,target))
+int f0_1_tgtgsy () { assert (false); return 33; }
+
+
+/* And with PREFER_TYPE.  */
+
+// nv: cuda, gcn: -, -, hip
+void f0_1_tgsy_c_cd_hi_hs_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_c_cd_hi_hs_)   match(construct={dispatch}) \
+            append_args(interop(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa")))
+void f0_1_tgsy_c_cd_hi_hs () { assert (false); }
+
+// nv: -, cuda_driver, gcn: hsa
+void f0_1_tgsy_hs_cd_c_hi_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_hs_cd_c_hi_)   match(construct={dispatch}) \
+            append_args(interop(targetsync, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda_driver"), attr("ompx_foobar")},{fr("cuda")}, {fr("hip")})))
+void f0_1_tgsy_hs_cd_c_hi () { assert (false); }
+
+// nv: -, hip, gcn: hsa
+void f0_1_tgsy_hs_hi_cd_c_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_hs_hi_cd_c_)   match(construct={dispatch}) \
+            append_args(interop(targetsync, prefer_type("hsa", "hip", "cuda_driver", "cuda")))
+void f0_1_tgsy_hs_hi_cd_c () { assert (false); }
+
+
+void
+check_f0 ()
+{
+  if (used_device_type == nvptx_device)
+    expected_fr = omp_ifr_cuda;
+  else if (used_device_type == gcn_device)
+    expected_fr = omp_ifr_hip;
+  else  /* host; variable shall not be accessed  */
+    expected_fr = omp_ifr_level_zero;
+
+  int i;
+  if (used_device_num == DEFAULT_DEVICE)
+    {
+      is_targetsync = 0;
+      #pragma omp dispatch
+        i = f0_1_tg ();
+      assert (i == 4242);
+
+      is_targetsync = 1;
+      #pragma omp dispatch
+        f0_1_tgsy ();
+
+      #pragma omp dispatch
+        i = f0_1_tgtgsy ();
+      assert (i == 3333);
+
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hip;
+      #pragma omp dispatch
+	 f0_1_tgsy_c_cd_hi_hs ();
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda_driver;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch
+	f0_1_tgsy_hs_cd_c_hi ();
+  
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_hip;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch
+	f0_1_tgsy_hs_hi_cd_c ();
+    }
+  else
+    {
+      is_targetsync = 0;
+      #pragma omp dispatch device(used_device_num)
+        i = f0_1_tg ();
+      assert (i == 4242);
+
+      is_targetsync = 1;
+      #pragma omp dispatch device(used_device_num)
+        f0_1_tgsy ();
+
+      #pragma omp dispatch device(used_device_num)
+        i = f0_1_tgtgsy ();
+      assert (i == 3333);
+
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hip;
+      #pragma omp dispatch device(used_device_num)
+	 f0_1_tgsy_c_cd_hi_hs ();
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda_driver;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch device(used_device_num)
+	f0_1_tgsy_hs_cd_c_hi ();
+  
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_hip;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch device(used_device_num)
+	f0_1_tgsy_hs_hi_cd_c ();
+    }
+}
+
+
+
+void
+do_check (int dev)
+{
+  int num_dev = omp_get_num_devices ();
+  const char *dev_type;
+  if (dev != DEFAULT_DEVICE)
+    omp_set_default_device (dev);
+  int is_nvptx = on_device_arch_nvptx ();
+  int is_gcn = on_device_arch_gcn ();
+  int is_host;
+ 
+  if (dev != DEFAULT_DEVICE)
+    is_host = dev == -1 || dev == num_dev;
+  else
+    {
+      int def_dev = omp_get_default_device ();
+      is_host = def_dev == -1 || def_dev == num_dev;
+    }
+
+  assert (is_nvptx + is_gcn + is_host == 1);
+
+  if (num_dev > 0 && dev != DEFAULT_DEVICE)
+    {
+      if (is_host)
+	omp_set_default_device (0);
+      else
+	omp_set_default_device (-1);
+    }
+
+  used_device_num = dev;
+  if (is_host)
+    {
+      dev_type = "host";
+      used_device_type = host_device;
+    }
+  else if (is_nvptx)
+    {
+      dev_type = "nvptx";
+      used_device_type = nvptx_device;
+    }
+  else if (is_gcn)
+    {
+      dev_type = "gcn";
+      used_device_type = gcn_device;
+    }
+
+  printf ("Running on the %s device (%d)\n", dev_type, dev);
+  check_f0 ();
+}
+
+
+
+int
+main ()
+{
+  do_check (DEFAULT_DEVICE);
+  int ndev = omp_get_num_devices ();
+  for (int dev = -1; dev < ndev; dev++)
+    do_check (dev);
+  for (int dev = -1; dev < ndev; dev++)
+    {
+      omp_set_default_device (dev);
+      do_check (DEFAULT_DEVICE);
+    }
+}
diff --git a/libgomp/testsuite/libgomp.c/append-args-fr.h b/libgomp/testsuite/libgomp.c/append-args-fr.h
new file mode 100644
index 0000000..9f6ca04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/append-args-fr.h
@@ -0,0 +1,305 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+/* Provides:  */
+
+#define DEFAULT_DEVICE -99
+
+void check_host (omp_interop_t obj);
+void check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync);
+void check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync);
+
+
+/* The following assumes that when a nvptx device is available,
+   cuda/cuda_driver/hip are supported.
+   And that likewise when a gcn device is available that the
+   plugin also can not only the HSA but also the HIP library
+   such that hsa/hip are supported.
+   For the host, omp_interop_none is expected.
+
+   Otherwise, it only does some basic tests without checking
+   that the returned result really makes sense.  */
+
+void check_type (omp_interop_t obj)
+{
+  const char *type;
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_fr_id);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "omp_interop_t") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_fr_name);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "const char *") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_vendor);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "int") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_vendor_name);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "const char *") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_device_num);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "int") == 0);
+  else
+    assert (type == NULL);
+
+  if (obj != omp_interop_none)
+    return;
+  assert (omp_get_interop_type_desc (obj, omp_ipr_platform) == NULL);
+  assert (omp_get_interop_type_desc (obj, omp_ipr_device) == NULL);
+  assert (omp_get_interop_type_desc (obj, omp_ipr_device_context) == NULL);
+  assert (omp_get_interop_type_desc (obj, omp_ipr_targetsync) == NULL);
+}
+
+
+void
+check_host (omp_interop_t obj)
+{
+  assert (obj == omp_interop_none);
+  check_type (obj);
+}
+
+
+void
+check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync)
+{
+  assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
+
+  omp_interop_rc_t ret_code = omp_irc_no_value;
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
+
+  assert (ret_code == omp_irc_success);
+  assert (fr == expected_fr);
+
+  ret_code = omp_irc_no_value;
+  const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
+
+  assert (ret_code == omp_irc_success);
+  if (fr == omp_ifr_cuda)
+    assert (strcmp (fr_name, "cuda") == 0);
+  else if (fr == omp_ifr_cuda_driver)
+    assert (strcmp (fr_name, "cuda_driver") == 0);
+  else if (fr == omp_ifr_hip)
+    assert (strcmp (fr_name, "hip") == 0);
+  else
+    assert (0);
+
+  ret_code = omp_irc_no_value;
+  int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (vendor == 11);  /* Nvidia */
+
+  ret_code = omp_irc_no_value;
+  const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (strcmp (vendor_name, "nvidia") == 0);
+
+  ret_code = omp_irc_no_value;
+  int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
+  assert (ret_code == omp_irc_success);
+  if (dev == DEFAULT_DEVICE)
+    assert (dev_num == omp_get_default_device ());
+  else
+    assert (dev_num == dev);
+
+  /* Platform: N/A.  */
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+
+  /* Device: int / CUdevice / hipDevice_t -- all internally an 'int'.  */
+  ret_code = omp_irc_no_value;
+  int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
+
+  /* CUDA also starts from 0 and goes to < n with cudaGetDeviceCount(&cn).  */
+  assert (ret_code == omp_irc_success);
+  assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
+
+  /* Device context: N/A / CUcontext / hipCtx_t -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
+
+  if (fr == omp_ifr_cuda)
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (ctx == NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_success);
+      assert (ctx != NULL);
+    }
+
+  /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
+
+  if (is_targetsync)  /* no targetsync */
+    {
+      assert (ret_code == omp_irc_success);
+      assert (stream != NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (stream == NULL);
+    }
+
+  check_type (obj);
+  if (fr == omp_ifr_cuda)
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "int") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "cudaStream_t") == 0);
+    }
+  else if (fr == omp_ifr_cuda_driver)
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "CUdevice") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "CUcontext") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "CUstream") == 0);
+    }
+  else
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
+    }
+}
+
+
+void
+check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync)
+{
+  assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
+
+  omp_interop_rc_t ret_code = omp_irc_no_value;
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
+
+  assert (ret_code == omp_irc_success);
+  assert (fr == expected_fr);
+
+  ret_code = omp_irc_no_value;
+  const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
+
+   assert (ret_code == omp_irc_success);
+  if (fr == omp_ifr_hip)
+    assert (strcmp (fr_name, "hip") == 0);
+  else if (fr == omp_ifr_hsa)
+    assert (strcmp (fr_name, "hsa") == 0);
+  else
+    assert (0);
+
+  ret_code = omp_irc_no_value;
+  int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (vendor == 1);  /* Amd */
+
+  ret_code = omp_irc_no_value;
+  const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (strcmp (vendor_name, "amd") == 0);
+
+  ret_code = omp_irc_no_value;
+  int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
+  assert (ret_code == omp_irc_success);
+  if (dev == DEFAULT_DEVICE)
+    assert (dev_num == omp_get_default_device ());
+  else
+    assert (dev_num == dev);
+
+  /* Platform: N/A.  */
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+
+  /* Device: hipDevice_t / hsa_agent_t* -- hip is internally an 'int'.  */
+  ret_code = omp_irc_no_value;
+  if (fr == omp_ifr_hip)
+    {
+      /* HIP also starts from 0 and goes to < n as with cudaGetDeviceCount(&cn).  */
+      int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
+      assert (ret_code == omp_irc_success);
+      assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
+    }
+  else
+    {
+      void *agent = omp_get_interop_ptr (obj, omp_ipr_device, &ret_code);
+      assert (ret_code == omp_irc_success);
+      assert (agent != NULL);
+    }
+
+  /* Device context: hipCtx_t / N/A -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
+  if (fr == omp_ifr_hip)
+    {
+      assert (ret_code == omp_irc_success);
+      assert (ctx != NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (ctx == NULL);
+    }
+
+  /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
+
+  if (is_targetsync)
+    {
+      assert (ret_code == omp_irc_success);
+      assert (stream != NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (stream == NULL);
+    }
+
+  check_type (obj);
+  if (fr == omp_ifr_hip)
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
+    }
+  else
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hsa_agent_t *") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hsa_queue_t *") == 0);
+    }
+}
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c
new file mode 100644
index 0000000..e6941d3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c
@@ -0,0 +1,8 @@
+/* { dg-do link { target { offload_target_nvptx } } } */
+/* { dg-additional-options -foffload=nvptx-none } */
+/* { dg-additional-options "-foffload=-misa=sm_61 -foffload=-mptx=_" } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-3.h"
+
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f61 \\(\\);" "optimized" } } */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3.h b/libgomp/testsuite/libgomp.c/declare-variant-3.h
index c9c8f4a..f5695a2 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-3.h
+++ b/libgomp/testsuite/libgomp.c/declare-variant-3.h
@@ -37,6 +37,13 @@ f53 (void)
 
 __attribute__ ((noipa))
 int
+f61 (void)
+{
+  return 61;
+}
+
+__attribute__ ((noipa))
+int
 f70 (void)
 {
   return 70;
@@ -68,6 +75,7 @@ f89 (void)
 #pragma omp declare variant (f37) match (device={isa("sm_37")})
 #pragma omp declare variant (f52) match (device={isa("sm_52")})
 #pragma omp declare variant (f53) match (device={isa("sm_53")})
+#pragma omp declare variant (f61) match (device={isa("sm_61")})
 #pragma omp declare variant (f70) match (device={isa("sm_70")})
 #pragma omp declare variant (f75) match (device={isa("sm_75")})
 #pragma omp declare variant (f80) match (device={isa("sm_80")})
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx10-3-generic.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx10-3-generic.c
new file mode 100644
index 0000000..b7b95e6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx10-3-generic.c
@@ -0,0 +1,25 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx10-3-generic } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx10_3_generic \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available (as this is a generic config),
+   scan-offload-tree-dump will PASS - but linking fails with the
+   following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1030.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1030.c
index d98d5ef..3703e96 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1030.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1030.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1030 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1031.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1031.c
new file mode 100644
index 0000000..e0d6289
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1031.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1031 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1031 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1032.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1032.c
new file mode 100644
index 0000000..46174cc2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1032.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1032 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1032 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1033.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1033.c
new file mode 100644
index 0000000..1bd6e66
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1033.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1033 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1033 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1034.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1034.c
new file mode 100644
index 0000000..4f67a73
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1034.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1034 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1034 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1035.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1035.c
new file mode 100644
index 0000000..a69d5e7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1035.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1035 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1035 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1036.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1036.c
index 93b8641..8c258c4 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1036.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1036.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1036 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx11-generic.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx11-generic.c
new file mode 100644
index 0000000..fa9efb4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx11-generic.c
@@ -0,0 +1,25 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx11-generic } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx11_generic \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available (as this is a generic config),
+   scan-offload-tree-dump will PASS - but linking fails with the
+   following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1100.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1100.c
index 6ade352..f0b7c6d 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1100.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1100.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1100 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1101.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1101.c
new file mode 100644
index 0000000..213e904
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1101.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1101 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1101 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1102.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1102.c
new file mode 100644
index 0000000..3f68dc8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1102.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1102 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1102 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1103.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1103.c
index 6a6dc4f..c1eed44 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1103.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1103.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1103 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1150.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1150.c
new file mode 100644
index 0000000..39d64ca
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1150.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1150 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1150 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1151.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1151.c
new file mode 100644
index 0000000..2a0c732
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1151.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1151 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1151 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1152.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1152.c
new file mode 100644
index 0000000..3c987dd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1152.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1152 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1152 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1153.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1153.c
new file mode 100644
index 0000000..7d38b82
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx1153.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx1153 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx1153 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-4-generic.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-4-generic.c
new file mode 100644
index 0000000..07d1254
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-4-generic.c
@@ -0,0 +1,25 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx9-4-generic } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx9_4_generic \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available (as this is a generic config),
+   scan-offload-tree-dump will PASS - but linking fails with the
+   following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-generic.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-generic.c
new file mode 100644
index 0000000..d6ba097
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx9-generic.c
@@ -0,0 +1,25 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx9-generic } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx9_generic \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available (as this is a generic config),
+   scan-offload-tree-dump will PASS - but linking fails with the
+   following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c
index f3f5244..37005fc 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx900.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx900 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx902.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx902.c
new file mode 100644
index 0000000..82981c5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx902.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx902 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx902 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx904.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx904.c
new file mode 100644
index 0000000..89815fe
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx904.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx904 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx904 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c
index ac43388..aeef690 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx906.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx906 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c
index f60741f..799b546 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx908.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx908 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx909.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx909.c
new file mode 100644
index 0000000..e8a6f63
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx909.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx909 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx909 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c
index 832d174..de5626e 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90a.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx90a \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90c.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90c.c
index 44629a8..dfad7ec 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90c.c
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx90c.c
@@ -6,3 +6,28 @@
 #include "declare-variant-4.h"
 
 /* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx90c \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c
new file mode 100644
index 0000000..c8c7446
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx942 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx942 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx950.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx950.c
new file mode 100644
index 0000000..af81f11
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx950.c
@@ -0,0 +1,33 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx950 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx950 \\(\\);" "optimized" } } */
+
+
+/* This code will link nicely if the multilib for that GPU architecture
+   has been build for GCC. In that case, scan-offload-tree-dump will
+   PASS and the linking will yield an XPASS message due to following line: */
+
+/* { dg-excess-errors "ld: error: unable to find library -lgomp|gcn mkoffload: fatal error" } */
+
+/* If the multi-lib config is not available, there are two options:
+
+   * If the generic multi-lib is available, mkoffload fails early,
+     yielding UNRESOLVED for scan-offload-tree-dump and an XFAIL
+     for the message:
+       gcn mkoffload: fatal error: GCC was built without library support
+       for '-march=gfx...'; consider compiling for the associated
+       generic architecture '-march=gfx...-generic' instead
+
+   * Or compling succeeds - then scan-offload-tree-dump will PASS -
+     but linking fails with the following error (XFAIL):
+       ld: error: unable to find library -lgomp
+       collect2: error: ld returned 1 exit status
+       gcn mkoffload: fatal error: ...-gnu-accel-amdgcn-amdhsa-gcc returned 1 exit status
+       compilation terminated.
+       lto-wrapper: fatal error: .../amdgcn-amdhsa/mkoffload returned 1 exit status
+       compilation terminated. */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4.h b/libgomp/testsuite/libgomp.c/declare-variant-4.h
index 53788d2..dd97edb 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4.h
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4.h
@@ -9,6 +9,20 @@ gfx900 (void)
 
 __attribute__ ((noipa))
 int
+gfx902 (void)
+{
+  return 0x902;
+}
+
+__attribute__ ((noipa))
+int
+gfx904 (void)
+{
+  return 0x904;
+}
+
+__attribute__ ((noipa))
+int
 gfx906 (void)
 {
   return 0x906;
@@ -23,6 +37,13 @@ gfx908 (void)
 
 __attribute__ ((noipa))
 int
+gfx909 (void)
+{
+  return 0x909;
+}
+
+__attribute__ ((noipa))
+int
 gfx90a (void)
 {
   return 0x90a;
@@ -37,6 +58,20 @@ gfx90c (void)
 
 __attribute__ ((noipa))
 int
+gfx942 (void)
+{
+  return 0x942;
+}
+
+__attribute__ ((noipa))
+int
+gfx950 (void)
+{
+  return 0x950;
+}
+
+__attribute__ ((noipa))
+int
 gfx1030 (void)
 {
   return 0x1030;
@@ -44,6 +79,41 @@ gfx1030 (void)
 
 __attribute__ ((noipa))
 int
+gfx1031 (void)
+{
+  return 0x1031;
+}
+
+__attribute__ ((noipa))
+int
+gfx1032 (void)
+{
+  return 0x1032;
+}
+
+__attribute__ ((noipa))
+int
+gfx1033 (void)
+{
+  return 0x1033;
+}
+
+__attribute__ ((noipa))
+int
+gfx1034 (void)
+{
+  return 0x1034;
+}
+
+__attribute__ ((noipa))
+int
+gfx1035 (void)
+{
+  return 0x1035;
+}
+
+__attribute__ ((noipa))
+int
 gfx1036 (void)
 {
   return 0x1036;
@@ -58,20 +128,111 @@ gfx1100 (void)
 
 __attribute__ ((noipa))
 int
+gfx1101 (void)
+{
+  return 0x1101;
+}
+
+__attribute__ ((noipa))
+int
+gfx1102 (void)
+{
+  return 0x1102;
+}
+
+__attribute__ ((noipa))
+int
 gfx1103 (void)
 {
   return 0x1103;
 }
 
+__attribute__ ((noipa))
+int
+gfx1150 (void)
+{
+  return 0x1150;
+}
+
+__attribute__ ((noipa))
+int
+gfx1151 (void)
+{
+  return 0x1151;
+}
+
+__attribute__ ((noipa))
+int
+gfx1152 (void)
+{
+  return 0x1152;
+}
+
+__attribute__ ((noipa))
+int
+gfx1153 (void)
+{
+  return 0x1153;
+}
+
+__attribute__ ((noipa))
+int
+gfx9_generic (void)
+{
+  return 0x90ff;
+}
+
+__attribute__ ((noipa))
+int
+gfx9_4_generic (void)
+{
+  return 0x94ff;
+}
+
+__attribute__ ((noipa))
+int
+gfx10_3_generic (void)
+{
+  return 0x103ff;
+}
+
+__attribute__ ((noipa))
+int
+gfx11_generic (void)
+{
+  return 0x110ff;
+}
+
+
 #pragma omp declare variant(gfx900) match(device = {isa("gfx900")})
+#pragma omp declare variant(gfx902) match(device = {isa("gfx902")})
+#pragma omp declare variant(gfx904) match(device = {isa("gfx904")})
 #pragma omp declare variant(gfx906) match(device = {isa("gfx906")})
 #pragma omp declare variant(gfx908) match(device = {isa("gfx908")})
+#pragma omp declare variant(gfx909) match(device = {isa("gfx909")})
 #pragma omp declare variant(gfx90a) match(device = {isa("gfx90a")})
 #pragma omp declare variant(gfx90c) match(device = {isa("gfx90c")})
+#pragma omp declare variant(gfx942) match(device = {isa("gfx942")})
+#pragma omp declare variant(gfx950) match(device = {isa("gfx950")})
 #pragma omp declare variant(gfx1030) match(device = {isa("gfx1030")})
+#pragma omp declare variant(gfx1031) match(device = {isa("gfx1031")})
+#pragma omp declare variant(gfx1032) match(device = {isa("gfx1032")})
+#pragma omp declare variant(gfx1033) match(device = {isa("gfx1033")})
+#pragma omp declare variant(gfx1034) match(device = {isa("gfx1034")})
+#pragma omp declare variant(gfx1035) match(device = {isa("gfx1035")})
 #pragma omp declare variant(gfx1036) match(device = {isa("gfx1036")})
 #pragma omp declare variant(gfx1100) match(device = {isa("gfx1100")})
+#pragma omp declare variant(gfx1101) match(device = {isa("gfx1101")})
+#pragma omp declare variant(gfx1102) match(device = {isa("gfx1102")})
 #pragma omp declare variant(gfx1103) match(device = {isa("gfx1103")})
+#pragma omp declare variant(gfx1150) match(device = {isa("gfx1150")})
+#pragma omp declare variant(gfx1151) match(device = {isa("gfx1151")})
+#pragma omp declare variant(gfx1152) match(device = {isa("gfx1152")})
+#pragma omp declare variant(gfx1153) match(device = {isa("gfx1153")})
+#pragma omp declare variant(gfx9_generic) match(device = {isa("gfx9-generic")})
+#pragma omp declare variant(gfx9_4_generic) match(device = {isa("gfx9-4-generic")})
+#pragma omp declare variant(gfx10_3_generic) match(device = {isa("gfx10-3-generic")})
+#pragma omp declare variant(gfx11_generic) match(device = {isa("gfx11-generic")})
 __attribute__ ((noipa))
 int
 f (void)
diff --git a/libgomp/testsuite/libgomp.c/device_uid.c b/libgomp/testsuite/libgomp.c/device_uid.c
index 0412d06..83aba0f 100644
--- a/libgomp/testsuite/libgomp.c/device_uid.c
+++ b/libgomp/testsuite/libgomp.c/device_uid.c
@@ -5,10 +5,12 @@
 int main()
 {
   const char **strs = (const char **) malloc (sizeof (char*) * (omp_get_num_devices () + 1));
-  for (int i = omp_invalid_device - 1; i <= omp_get_num_devices () + 1; i++)
+  for (int i = omp_default_device - 1; i <= omp_get_num_devices () + 1; i++)
     {
       const char *str = omp_get_uid_from_device (i);
       int dev = omp_get_device_from_uid (str);
+      if (i == omp_default_device)
+	i = omp_get_default_device ();
 // __builtin_printf("%i -> %s -> %d\n", i, str, dev);
       if (i < omp_initial_device || i > omp_get_num_devices ())
 	{
diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-full.c b/libgomp/testsuite/libgomp.c/interop-cublas-full.c
new file mode 100644
index 0000000..2df5277
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cublas-full.c
@@ -0,0 +1,176 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-additional-options "-lcublas" } */
+
+/* NOTE: This file is also included by libgomp.c-c++-common/interop-cudablas-libonly.c
+   to test the fallback version.  */
+
+/* Check whether cuBlas' daxpy works with an interop object.
+     daxpy(N, DA, DX, INCX, DY, INCY)
+   calculates (for DX = DY = 1):
+     DY(1:N) =  DY(1:N) + DA * DX(1:N)
+   and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document  */
+
+/* Minimal check whether CUDA works - by checking whether the API routines
+   seem to work.  This includes a fallback if the header is not
+   available.  */
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+  #include <cuda.h>
+  #include <cudaTypedefs.h>
+  #include <cuda_runtime.h>
+  #include <cublas_v2.h>
+
+#else
+  /* Add a poor man's fallback declaration.  */
+  #if USE_CUDA_FALLBACK_HEADER
+    // Don't warn.
+  #elif !__has_include(<cuda.h>)
+    #warning "Using GCC's cuda.h as fallback for cuda.h"
+  #elif !__has_include(<cudaTypedefs.h>)
+    #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+  #elif !__has_include(<cuda_runtime.h>)
+    #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+  #else
+    #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+  #endif
+  #include "../../../include/cuda/cuda.h"
+
+  typedef enum {
+    CUBLAS_STATUS_SUCCESS = 0,
+  } cublasStatus_t;
+
+  typedef CUstream cudaStream_t;
+  typedef struct cublasContext* cublasHandle_t;
+
+  #define cublasCreate cublasCreate_v2
+  cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+  #define cublasSetStream cublasSetStream_v2
+  cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+  #define cublasDaxpy cublasDaxpy_v2
+  cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+#endif
+
+static int used_variant = 0;
+
+void
+run_cuBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+  used_variant = 1;
+
+  omp_interop_rc_t res;
+  cublasStatus_t stat;
+
+  omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success && fr == omp_ifr_cuda);
+
+  cudaStream_t stream = (cudaStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  cublasHandle_t handle;
+  stat = cublasCreate (&handle);
+  assert (stat == CUBLAS_STATUS_SUCCESS);
+
+  stat = cublasSetStream (handle, stream);
+  assert (stat == CUBLAS_STATUS_SUCCESS);
+
+  /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space.  */
+  stat = cublasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+  assert (stat == CUBLAS_STATUS_SUCCESS);
+}
+
+
+#pragma omp declare variant(run_cuBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("cuda")))
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+  used_variant = 2;
+
+  if (incx == 1 && incy == 1)
+    #pragma omp simd
+    for (int i = 0; i < n; i++)
+      dy[i] += da * dx[i];
+  else
+    {
+      int ix = 0;
+      int iy = 0;
+      for (int i = 0; i < n; i++)
+	{
+	  dy[iy] += da * dx[ix];
+	  ix += incx;
+	  iy += incy;
+	}
+    }
+}
+
+
+void
+run_test (int dev)
+{
+  constexpr int N = 1024;
+
+  // A = {1,2,...,N}
+  // B = {-1, -2, ..., N}
+  // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+  //    = B + 3*A
+  // -> B' = {0, 2, 4, 6, ... }
+
+  double A[N], B[N];
+  double factor = 3.0;
+  for (int i = 0; i < N; i++)
+    {
+      A[i] = i;
+      B[i] = -i;
+    }
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target enter data device(dev) map(A, B)
+    }
+
+  used_variant = 99;
+  #pragma omp dispatch device(dev)
+    run_daxpy (N, factor, A, 1, B, 1);  
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+      int tmp = omp_get_default_device ();
+      omp_set_default_device (dev);
+      if (on_device_arch_nvptx ())
+	assert (used_variant == 1);
+      else
+	assert (used_variant == 2);
+      omp_set_default_device (tmp);
+    }
+  else
+    assert (used_variant == 2);
+
+  for (int i = 0; i < N; i++)
+    assert (B[i] == 2*i);
+}
+
+int   
+main () 
+{   
+  int ndev = omp_get_num_devices ();
+
+  for (int dev = 0; dev <= ndev; dev++)
+    run_test (dev);
+  run_test (omp_initial_device);  
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c
new file mode 100644
index 0000000..89c0652
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target openacc_libcublas } */
+/* { dg-additional-options "-lcublas" } */
+
+/* Same as interop-cudablas-full.c, but also works if the header is not available. */
+
+#define USE_CUDA_FALLBACK_HEADER 1
+#include "interop-cublas-full.c"
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-full.c b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
new file mode 100644
index 0000000..c48a934
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
@@ -0,0 +1,162 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+/* NOTE: This file is also included by libgomp.c-c++-common/interop-cuda-libonly.c
+   to test the fallback version, which defines USE_CUDA_FALLBACK_HEADER.  */
+
+/* Minimal check whether CUDA works - by checking whether the API routines
+   seem to work.  This includes a fallback if the header is not
+   available.  */
+
+#include <assert.h>
+#include <omp.h>
+
+#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+  #include <cuda.h>
+  #include <cudaTypedefs.h>
+  #include <cuda_runtime.h>
+
+#else
+  /* Add a poor man's fallback declaration.  */
+  #if USE_CUDA_FALLBACK_HEADER
+    // Don't warn.
+  #elif !__has_include(<cuda.h>)
+    #warning "Using GCC's cuda.h as fallback for cuda.h"
+  #elif !__has_include(<cudaTypedefs.h>)
+    #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+  #else
+    #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+  #endif
+  #include "../../../include/cuda/cuda.h"
+
+  typedef int cudaError_t;
+  typedef CUstream cudaStream_t;
+  enum {
+    cudaSuccess = 0
+  };
+
+  enum cudaDeviceAttr {
+    cudaDevAttrClockRate = 13,
+    cudaDevAttrMaxGridDimX = 5
+  };
+
+  cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int);
+  cudaError_t cudaStreamQuery(cudaStream_t);
+  CUresult cuCtxGetApiVersion(CUcontext, unsigned int *);
+  CUresult cuStreamGetCtx (CUstream, CUcontext *);
+#endif
+
+int
+main ()
+{
+  int ivar;
+  unsigned uvar;
+  omp_interop_rc_t res;
+  omp_interop_t obj_cuda = omp_interop_none;
+  omp_interop_t obj_cuda_driver = omp_interop_none;
+  cudaError_t cuda_err;
+  CUresult cu_err;
+
+  #pragma omp interop init(target, targetsync, prefer_type("cuda") : obj_cuda) \
+		      init(target, targetsync, prefer_type("cuda_driver") : obj_cuda_driver) \
+
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success);
+  assert (fr == omp_ifr_cuda);
+
+  fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda_driver, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success);
+  assert (fr == omp_ifr_cuda_driver);
+
+  ivar = (int) omp_get_interop_int (obj_cuda, omp_ipr_vendor, &res);
+  assert (res == omp_irc_success);
+  assert (ivar == 11);
+
+  ivar = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_vendor, &res);
+  assert (res == omp_irc_success);
+  assert (ivar == 11);
+
+
+  /* Check whether the omp_ipr_device -> cudaDevice_t yields a valid device.  */
+
+  CUdevice cu_dev = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_device, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume a clock size is available and > 1 GHz; value is in kHz.  */
+  cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cu_dev);
+  assert (cu_err == CUDA_SUCCESS);
+  assert (ivar > 1000000 /* kHz */);
+
+  /* Assume that the MaxGridDimX is available and > 1024.  */
+  cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cu_dev);
+  assert (cu_err == CUDA_SUCCESS);
+  assert (ivar > 1024);
+
+  int cuda_dev = (int) omp_get_interop_int (obj_cuda, omp_ipr_device, &res);
+  assert (res == omp_irc_success);
+  assert (cuda_dev == (CUdevice) cu_dev); // Assume they are the same ...
+
+  /* Assume a clock size is available and > 1 GHz; value is in kHz.  */
+  cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cuda_dev);
+  assert (cuda_err == cudaSuccess);
+  assert (ivar > 1000000 /* kHz */);
+
+  /* Assume that the MaxGridDimX is available and > 1024.  */
+  cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cuda_dev);
+  assert (cuda_err == cudaSuccess);
+  assert (ivar > 1024);
+
+
+
+
+  /* Check whether the omp_ipr_device_context -> CUcontext yields a context.  */
+
+  CUcontext cu_ctx = (CUcontext) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_device_context, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume API Version > 0 for Nvidia, cudaErrorNotSupported for AMD.  */
+  uvar = 99;
+  cu_err = cuCtxGetApiVersion (cu_ctx, &uvar);
+  assert (cu_err == CUDA_SUCCESS);
+  assert (uvar > 0);
+
+
+  /* Check whether the omp_ipr_targetsync -> cudaStream_t yields a stream.  */
+
+  cudaStream_t cuda_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  CUstream cu_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  assert ((void*) cu_sm != (void*) cuda_sm); // Type compatible but should have created two streams
+
+  int dev_stream = 99;
+#if CUDA_VERSION >= 12080
+  cuda_err = cudaStreamGetDevice (cuda_sm, &dev_stream);
+  assert (cuda_err == cudaSuccess);
+#else
+  cu_err = cuStreamGetCtx (cu_sm, &cu_ctx) != CUDA_SUCCESS;
+  if (cu_err == CUDA_SUCCESS)
+    cuda_err = cuCtxPushCurrent (cu_ctx) != CUDA_SUCCESS;
+  if (cu_err == CUDA_SUCCESS)
+    cuda_err = cuCtxGetDevice (&dev_stream) != CUDA_SUCCESS;
+  if (cu_err == CUDA_SUCCESS)
+    cu_err = cuCtxPopCurrent (&cu_ctx) != CUDA_SUCCESS;
+  assert (cu_err == CUDA_SUCCESS);
+#endif
+  assert (dev_stream == cuda_dev);
+
+  /* All jobs should have been completed (as there were none none)  */
+  cuda_err = cudaStreamQuery (cuda_sm);
+  assert (cuda_err == cudaSuccess);
+
+  cu_err = cuStreamQuery (cu_sm);
+  assert (cu_err == CUDA_SUCCESS);
+
+  #pragma omp interop destroy(obj_cuda, obj_cuda_driver)
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
new file mode 100644
index 0000000..bc257a2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
@@ -0,0 +1,11 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_libcudart } */
+/* { dg-require-effective-target openacc_libcuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+/* Same as interop-cuda-full.c, but also works if the header is not available. */
+
+#define USE_CUDA_FALLBACK_HEADER 1
+#include "interop-cuda-full.c"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
new file mode 100644
index 0000000..bd44f44
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
@@ -0,0 +1,10 @@
+/* { dg-do run { target { offload_device_gcn } } } */
+/* { dg-do link { target { ! offload_device_gcn } } } */
+
+/* { dg-require-effective-target gomp_hip_header_amd } */
+/* { dg-require-effective-target gomp_libamdhip64 } */
+/* { dg-additional-options "-lamdhip64" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
new file mode 100644
index 0000000..91ad987
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
@@ -0,0 +1,11 @@
+/* { dg-do run { target { offload_device_gcn } } } */
+/* { dg-do link { target { ! offload_device_gcn } } } */
+
+/* { dg-require-effective-target gomp_libamdhip64 } */
+/* { dg-additional-options "-lamdhip64" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
new file mode 100644
index 0000000..d5dc236
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
@@ -0,0 +1,11 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-require-effective-target gomp_hip_header_nvidia } */
+/* { dg-additional-options "-lcuda -lcudart -Wno-deprecated-declarations" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
new file mode 100644
index 0000000..7cff2cb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
@@ -0,0 +1,13 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_libcudart } */
+/* { dg-require-effective-target openacc_libcuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+#define USE_CUDA_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
new file mode 100644
index 0000000..7b7dc74
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
@@ -0,0 +1,12 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip.h b/libgomp/testsuite/libgomp.c/interop-hip.h
new file mode 100644
index 0000000..20a1ccb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip.h
@@ -0,0 +1,234 @@
+/* Minimal check whether HIP works - by checking whether the API routines
+   seem to work.  This includes various fallbacks if the header is not
+   available.  */
+
+#include <assert.h>
+#include <omp.h>
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if __has_include(<hip/hip_runtime_api.h>) && !defined(USE_HIP_FALLBACK_HEADER)
+  #include <hip/hip_runtime_api.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_AMD__"
+  #endif
+
+  typedef struct ihipStream_t* hipStream_t;
+  typedef struct ihipCtx_t* hipCtx_t;
+  typedef int hipError_t;
+  typedef int hipDevice_t;
+  enum {
+    hipSuccess = 0,
+    hipErrorNotSupported = 801
+  };
+
+  typedef enum hipDeviceAttribute_t {
+    hipDeviceAttributeClockRate = 5,
+    hipDeviceAttributeMaxGridDimX = 29
+  } hipDeviceAttribute_t;
+
+  hipError_t hipDeviceGetAttribute (int *, hipDeviceAttribute_t, hipDevice_t);
+  hipError_t hipCtxGetApiVersion (hipCtx_t, int *);
+  hipError_t hipStreamGetDevice (hipStream_t, hipDevice_t *);
+  hipError_t hipStreamQuery (hipStream_t);
+
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_NVIDIA__"
+  #endif
+
+  #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+    #include <cuda.h>
+    #include <cudaTypedefs.h>
+    #include <cuda_runtime.h>
+  #else
+    #if defined(USE_CUDA_FALLBACK_HEADER)
+       // no warning
+    #elif !__has_include(<cuda.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda.h"
+    #elif !__has_include(<cudaTypedefs.h>)
+      #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+    #else
+      #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+    #endif
+
+    #include "../../../include/cuda/cuda.h"
+
+    typedef int cudaError_t;
+    enum {
+      cudaSuccess = 0
+    };
+
+    enum cudaDeviceAttr {
+      cudaDevAttrClockRate = 13,
+      cudaDevAttrMaxGridDimX = 5
+    };
+
+    cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int);
+    CUresult cuCtxGetApiVersion(CUcontext, unsigned int *);
+    CUresult cuStreamGetCtx (CUstream, CUcontext *);
+  #endif
+
+  typedef CUstream hipStream_t;
+  typedef CUcontext hipCtx_t;
+  typedef CUdevice hipDevice_t;
+
+  typedef int hipError_t;
+  typedef int hipDevice_t;
+  enum {
+    hipSuccess = 0,
+    hipErrorNotSupported = 801
+  };
+
+
+  typedef enum hipDeviceAttribute_t {
+    hipDeviceAttributeClockRate = 5,
+    hipDeviceAttributeMaxGridDimX = 29
+  } hipDeviceAttribute_t;
+
+  inline static hipError_t
+  hipDeviceGetAttribute (int *ival, hipDeviceAttribute_t attr, hipDevice_t dev)
+  {
+    enum cudaDeviceAttr cuattr;
+    switch (attr)
+      {
+      case hipDeviceAttributeClockRate:
+	cuattr = cudaDevAttrClockRate;
+	break;
+      case hipDeviceAttributeMaxGridDimX:
+	cuattr = cudaDevAttrMaxGridDimX;
+	break;
+      default:
+	assert (0);
+      }
+    return cudaDeviceGetAttribute (ival, cuattr, dev) != cudaSuccess;
+  }
+
+  inline static hipError_t
+  hipCtxGetApiVersion (hipCtx_t ctx, int *ver)
+  {
+    unsigned uver;
+    hipError_t err;
+    err = cuCtxGetApiVersion (ctx, &uver) != CUDA_SUCCESS;
+    *ver = (int) uver;
+    return err;
+  }
+
+  inline static hipError_t
+  hipStreamGetDevice (hipStream_t stream, hipDevice_t *dev)
+  {
+#if CUDA_VERSION >= 12080
+    return cudaStreamGetDevice (stream, dev);
+#else
+    hipError_t err;
+    CUcontext ctx;
+    err = cuStreamGetCtx (stream, &ctx) != CUDA_SUCCESS;
+    if (err == hipSuccess)
+      err = cuCtxPushCurrent (ctx) != CUDA_SUCCESS;
+    if (err == hipSuccess)
+      err = cuCtxGetDevice (dev) != CUDA_SUCCESS;
+    if (err == hipSuccess)
+      err = cuCtxPopCurrent (&ctx) != CUDA_SUCCESS;
+    return err;
+#endif
+  }
+
+  inline static hipError_t
+  hipStreamQuery (hipStream_t stream)
+  {
+    return cuStreamQuery (stream) != CUDA_SUCCESS;
+  }
+
+#else
+  #error "should be unreachable"
+#endif
+
+int
+main ()
+{
+  int ivar;
+  omp_interop_rc_t res;
+  omp_interop_t obj = omp_interop_none;
+  hipError_t hip_err;
+
+  #pragma omp interop init(target, targetsync, prefer_type("hip") : obj)
+
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success);
+  assert (fr == omp_ifr_hip);
+
+  ivar = (int) omp_get_interop_int (obj, omp_ipr_vendor, &res);
+  assert (res == omp_irc_success);
+  int vendor_is_amd = ivar == 1;
+  #if defined(__HIP_PLATFORM_AMD__)
+    assert (ivar == 1);
+  #elif defined(__HIP_PLATFORM_NVIDIA__)
+    assert (ivar == 11);
+  #else
+    assert (0);
+  #endif
+
+
+  /* Check whether the omp_ipr_device -> hipDevice_t yields a valid device.  */
+
+  hipDevice_t hip_dev = (int) omp_get_interop_int (obj, omp_ipr_device, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume a clock size is available and > 1 GHz; value is in kHz.  */
+  hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeClockRate, hip_dev);
+  assert (hip_err == hipSuccess);
+  assert (ivar > 1000000 /* kHz */);
+
+  /* Assume that the MaxGridDimX is available and > 1024.  */
+  hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeMaxGridDimX, hip_dev);
+  assert (hip_err == hipSuccess);
+  assert (ivar > 1024);
+
+
+  /* Check whether the omp_ipr_device_context -> hipCtx_t yields a context.  */
+
+  hipCtx_t hip_ctx = (hipCtx_t) omp_get_interop_ptr (obj, omp_ipr_device_context, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD.  */
+  ivar = -99;
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+    hip_err = hipCtxGetApiVersion (hip_ctx, &ivar);
+  #pragma GCC diagnostic pop
+
+  if (vendor_is_amd)
+    assert (hip_err == hipErrorNotSupported && ivar == -99);
+  else
+    {
+      assert (hip_err == hipSuccess);
+      assert (ivar > 0);
+    }
+
+
+  /* Check whether the omp_ipr_targetsync -> hipStream_t yields a stream.  */
+
+  hipStream_t hip_sm = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  hipDevice_t dev_stream = 99;
+  hip_err = hipStreamGetDevice (hip_sm, &dev_stream);
+  assert (hip_err == hipSuccess);
+  assert (dev_stream == hip_dev);
+
+  /* All jobs should have been completed (as there were none none)  */
+  hip_err = hipStreamQuery (hip_sm);
+  assert (hip_err == hipSuccess);
+
+  #pragma omp interop destroy(obj)
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c
new file mode 100644
index 0000000..53c05bd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target gomp_hip_header_amd } */
+/* { dg-require-effective-target gomp_libhipblas } */
+/* { dg-additional-options "-lhipblas" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c
new file mode 100644
index 0000000..0ea3133
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target gomp_libhipblas } */
+/* { dg-additional-options "-lhipblas" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c
new file mode 100644
index 0000000..ed428c6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-require-effective-target gomp_hip_header_nvidia } */
+/* { dg-additional-options "-lcublas -Wno-deprecated-declarations" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c
new file mode 100644
index 0000000..1a31b30
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c
@@ -0,0 +1,9 @@
+/* { dg-require-effective-target openacc_libcublas } */
+/* { dg-additional-options "-lcublas" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+#define USE_CUDA_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c
new file mode 100644
index 0000000..f85c13b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-additional-options "-lcublas" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas.h b/libgomp/testsuite/libgomp.c/interop-hipblas.h
new file mode 100644
index 0000000..d7cb174
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas.h
@@ -0,0 +1,240 @@
+/* Check whether hipBlas' daxpy works with an interop object.
+     daxpy(N, DA, DX, INCX, DY, INCY)
+   calculates (for DX = DY = 1):
+     DY(1:N) =  DY(1:N) + DA * DX(1:N)
+   and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document  */
+
+/* Minimal check whether HIP works - by checking whether the API routines
+   seem to work.  This includes a fallback if the header is not
+   available.  */
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<hipblas/hipblas.h>) && (__has_include(<library_types.h>) || !defined(__HIP_PLATFORM_NVIDIA__)) && !defined(USE_HIP_FALLBACK_HEADER)
+  #ifdef __HIP_PLATFORM_NVIDIA__
+    /* There seems to be an issue with hip/library_types.h including
+       CUDA's "library_types.h". Include CUDA's one explicitly here.
+       Could possibly worked around by using -isystem vs. -I.  */
+    #include <library_types.h>
+
+    /* For some reasons, the following symbols do not seem to get
+       mapped from HIP to CUDA, causing link errors.  */
+    #define hipblasSetStream cublasSetStream_v2
+    #define hipblasDaxpy cublasDaxpy_v2
+    #define hipblasCreate cublasCreate_v2
+  #endif
+  #include <hipblas/hipblas.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_AMD__"
+  #endif
+
+  typedef enum
+  {
+    HIPBLAS_STATUS_SUCCESS = 0
+
+  } hipblasStatus_t;
+
+  typedef struct ihipStream_t* hipStream_t;
+  typedef void* hipblasHandle_t;
+
+  hipblasStatus_t hipblasCreate (hipblasHandle_t*);
+  hipblasStatus_t hipblasSetStream (hipblasHandle_t, hipStream_t);
+  hipblasStatus_t hipblasDaxpy (hipblasHandle_t, int, const double*, const double*, int, double*, int);
+
+#else
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_NVIDA__"
+  #endif
+
+  #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+    #include <cuda.h>
+    #include <cudaTypedefs.h>
+    #include <cuda_runtime.h>
+    #include <cublas_v2.h>
+
+  #else
+    /* Add a poor man's fallback declaration.  */
+    #if defined(USE_CUDA_FALLBACK_HEADER)
+      // no warning
+    #elif !__has_include(<cuda.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda.h"
+    #elif !__has_include(<cudaTypedefs.h>)
+      #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+    #elif !__has_include(<cuda_runtime.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+    #else
+      #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+    #endif
+    #include "../../../include/cuda/cuda.h"
+
+    typedef enum {
+      CUBLAS_STATUS_SUCCESS = 0,
+    } cublasStatus_t;
+
+    typedef CUstream cudaStream_t;
+    typedef struct cublasContext* cublasHandle_t;
+
+    #define cublasCreate cublasCreate_v2
+    cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+    #define cublasSetStream cublasSetStream_v2
+    cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+    #define cublasDaxpy cublasDaxpy_v2
+    cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+  #endif
+
+  #define HIPBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
+  #define hipblasStatus_t cublasStatus_t
+  #define hipStream_t cudaStream_t
+  #define hipblasHandle_t cublasHandle_t
+  #define hipblasCreate cublasCreate
+  #define hipblasSetStream cublasSetStream
+  #define hipblasDaxpy cublasDaxpy
+#endif
+
+static int used_variant = 0;
+
+void
+run_hipBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+  used_variant = 1;
+
+  omp_interop_rc_t res;
+  hipblasStatus_t stat;
+
+  omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success && fr == omp_ifr_hip);
+
+  hipStream_t stream = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  hipblasHandle_t handle;
+  stat = hipblasCreate (&handle);
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+  stat = hipblasSetStream (handle, stream);
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+  /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space.  */
+  stat = hipblasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+}
+
+#if defined(__HIP_PLATFORM_AMD__)
+#pragma omp declare variant(run_hipBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("amdgcn")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("hip")))
+#elif defined(__HIP_PLATFORM_NVIDIA__) 
+#pragma omp declare variant(run_hipBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("hip")))
+#else
+ #error "wrong platform"
+#endif
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+  used_variant = 2;
+
+  if (incx == 1 && incy == 1)
+    #pragma omp simd
+    for (int i = 0; i < n; i++)
+      dy[i] += da * dx[i];
+  else
+    {
+      int ix = 0;
+      int iy = 0;
+      for (int i = 0; i < n; i++)
+	{
+	  dy[iy] += da * dx[ix];
+	  ix += incx;
+	  iy += incy;
+	}
+    }
+}
+
+
+void
+run_test (int dev)
+{
+  constexpr int N = 1024;
+
+  // A = {1,2,...,N}
+  // B = {-1, -2, ..., N}
+  // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+  //    = B + 3*A
+  // -> B' = {0, 2, 4, 6, ... }
+
+  double A[N], B[N];
+  double factor = 3.0;
+  for (int i = 0; i < N; i++)
+    {
+      A[i] = i;
+      B[i] = -i;
+    }
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target enter data device(dev) map(A, B)
+    }
+
+  used_variant = 99;
+  #pragma omp dispatch device(dev)
+    run_daxpy (N, factor, A, 1, B, 1);  
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+      int tmp = omp_get_default_device ();
+      omp_set_default_device (dev);
+#if defined(__HIP_PLATFORM_AMD__)
+      if (on_device_arch_gcn ())
+#else
+      if (on_device_arch_nvptx ())
+#endif
+	assert (used_variant == 1);
+      else
+	assert (used_variant == 2);
+      omp_set_default_device (tmp);
+    }
+  else
+    assert (used_variant == 2);
+
+  for (int i = 0; i < N; i++)
+    assert (B[i] == 2*i);
+}
+
+int   
+main () 
+{   
+  int ndev = omp_get_num_devices ();
+
+  for (int dev = 0; dev <= ndev; dev++)
+    run_test (dev);
+  run_test (omp_initial_device);  
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-hsa.c b/libgomp/testsuite/libgomp.c/interop-hsa.c
new file mode 100644
index 0000000..21ac91c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hsa.c
@@ -0,0 +1,205 @@
+/* { dg-additional-options "-ldl" } */
+/* { dg-require-effective-target offload_device_gcn }
+   The 'asm' insert is valid for GCN only:
+   { dg-additional-options -foffload=amdgcn-amdhsa } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include <assert.h>
+#include <dlfcn.h>
+#include "../../../include/hsa.h"
+#include "../../config/gcn/libgomp-gcn.h"
+
+#define STACKSIZE (100 * 1024)
+#define HEAPSIZE (10 * 1024 * 1024)
+#define ARENASIZE HEAPSIZE
+
+/* This code fragment must be optimized or else the host-fallback kernel has
+ * invalid ASM inserts.  The rest of the file can be compiled safely at -O0.  */
+#pragma omp declare target
+uintptr_t __attribute__((optimize("O1")))
+get_kernel_ptr ()
+{
+  uintptr_t val;
+  if (!omp_is_initial_device ())
+    /* "main._omp_fn.0" is the name GCC gives the first OpenMP target
+     * region in the "main" function.
+     * The ".kd" suffix is added by the LLVM assembler when it creates the
+     * kernel meta-data, and this is what we need to launch a kernel.  */
+    asm ("s_getpc_b64 %0\n\t"
+	 "s_add_u32 %L0, %L0, main._omp_fn.0.kd@rel32@lo+4\n\t"
+	 "s_addc_u32 %H0, %H0, main._omp_fn.0.kd@rel32@hi+4"
+	 : "=Sg"(val));
+  return val;
+}
+#pragma omp end declare target
+
+int
+main(int argc, char** argv)
+{
+
+  /* Load the HSA runtime DLL.  */
+  void *hsalib = dlopen ("libhsa-runtime64.so.1", RTLD_LAZY);
+  assert (hsalib);
+
+  hsa_status_t (*hsa_signal_create) (hsa_signal_value_t initial_value,
+				     uint32_t num_consumers,
+				     const hsa_agent_t *consumers,
+				     hsa_signal_t *signal)
+    = dlsym (hsalib, "hsa_signal_create");
+  assert (hsa_signal_create);
+
+  uint64_t (*hsa_queue_load_write_index_relaxed) (const hsa_queue_t *queue)
+    = dlsym (hsalib, "hsa_queue_load_write_index_relaxed");
+  assert (hsa_queue_load_write_index_relaxed);
+
+  void (*hsa_signal_store_relaxed) (hsa_signal_t signal,
+				    hsa_signal_value_t value)
+    = dlsym (hsalib, "hsa_signal_store_relaxed");
+  assert (hsa_signal_store_relaxed);
+
+  hsa_signal_value_t (*hsa_signal_wait_relaxed) (hsa_signal_t signal,
+						 hsa_signal_condition_t condition,
+						 hsa_signal_value_t compare_value,
+						 uint64_t timeout_hint,
+						 hsa_wait_state_t wait_state_hint)
+    = dlsym (hsalib, "hsa_signal_wait_relaxed");
+  assert (hsa_signal_wait_relaxed);
+
+  void (*hsa_queue_store_write_index_relaxed) (const hsa_queue_t *queue,
+					       uint64_t value)
+    = dlsym (hsalib, "hsa_queue_store_write_index_relaxed");
+  assert (hsa_queue_store_write_index_relaxed);
+
+  hsa_status_t (*hsa_signal_destroy) (hsa_signal_t signal)
+    = dlsym (hsalib, "hsa_signal_destroy");
+  assert (hsa_signal_destroy);
+
+  /* Set up the device data environment.  */
+  int test_data_value = 0;
+#pragma omp target enter data map(test_data_value)
+
+  /* Get the interop details.  */
+  int device_num = omp_get_default_device();
+  hsa_agent_t *gpu_agent;
+  hsa_queue_t *hsa_queue = NULL;
+
+  omp_interop_t interop = omp_interop_none;
+#pragma omp interop init(target, targetsync, prefer_type("hsa"): interop) device(device_num)
+  assert (interop != omp_interop_none);
+
+  omp_interop_rc_t retcode;
+  omp_interop_fr_t fr = omp_get_interop_int (interop, omp_ipr_fr_id, &retcode);
+  assert (retcode == omp_irc_success);
+  assert (fr == omp_ifr_hsa);
+
+  gpu_agent = omp_get_interop_ptr(interop, omp_ipr_device, &retcode);
+  assert (retcode == omp_irc_success);
+
+  hsa_queue = omp_get_interop_ptr(interop, omp_ipr_targetsync, &retcode);
+  assert (retcode == omp_irc_success);
+  assert (hsa_queue);
+
+  /* Call an offload kernel via OpenMP/libgomp.
+   *
+   * This kernel serves two purposes:
+   *   1) Lookup the device-side load-address of itself (thus avoiding the
+   *   need to access the libgomp internals).
+   *   2) Count how many times it is called.
+   * We then call it once using OpenMP, and once manually, and check
+   * the counter reads "2".  */
+  uint64_t kernel_object = 0;
+#pragma omp target map(from:kernel_object) map(present,alloc:test_data_value)
+  {
+    kernel_object = get_kernel_ptr ();
+    ++test_data_value;
+  }
+
+  assert (kernel_object != 0);
+
+  /* Configure the same kernel to run again, using HSA manually this time.  */
+  hsa_status_t status;
+  hsa_signal_t signal;
+  status = hsa_signal_create(1, 0, NULL, &signal);
+  assert (status == HSA_STATUS_SUCCESS);
+
+  /* The kernel is built by GCC for OpenMP, so we need to pass the same
+   * data pointers that libgomp would pass in.  */
+  struct {
+    uintptr_t test_data_value;
+    uintptr_t kernel_object;
+  } tgtaddrs;
+
+#pragma omp target data use_device_addr(test_data_value)
+  {
+    tgtaddrs.test_data_value = (uintptr_t)&test_data_value;
+    tgtaddrs.kernel_object = (uintptr_t)omp_target_alloc (8, device_num);
+  }
+
+  /* We also need to duplicate the launch ABI used by plugin-gcn.c.  */
+  struct kernargs_abi args;    /* From libgomp-gcn.h.  */
+  args.dummy1 = (int64_t)&tgtaddrs;
+  args.out_ptr = (int64_t)malloc (sizeof (struct output)); /* Host side.  */
+  args.heap_ptr = (int64_t)omp_target_alloc (HEAPSIZE, device_num);
+  args.arena_ptr = (int64_t)omp_target_alloc (ARENASIZE, device_num);
+  args.stack_ptr = (int64_t)omp_target_alloc (STACKSIZE, device_num);
+  args.arena_size_per_team = ARENASIZE;
+  args.stack_size_per_thread = STACKSIZE;
+
+  /* Build the HSA dispatch packet, and insert it into the queue.  */
+  uint64_t packet_id = hsa_queue_load_write_index_relaxed (hsa_queue);
+  const uint32_t queueMask = hsa_queue->size - 1;
+  hsa_kernel_dispatch_packet_t *dispatch_packet =
+    &(((hsa_kernel_dispatch_packet_t *)
+	  (hsa_queue->base_address))[packet_id & queueMask]);
+
+  dispatch_packet->setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+  dispatch_packet->workgroup_size_x = 1;
+  dispatch_packet->workgroup_size_y = 64;
+  dispatch_packet->workgroup_size_z = 1;
+  dispatch_packet->grid_size_x = 1;
+  dispatch_packet->grid_size_y = 64;
+  dispatch_packet->grid_size_z = 1;
+  dispatch_packet->completion_signal = signal;
+  dispatch_packet->kernel_object = kernel_object;
+  dispatch_packet->kernarg_address = &args;
+  dispatch_packet->private_segment_size = 0;
+  dispatch_packet->group_segment_size = 1536;
+
+  uint16_t header = 0;
+  header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+  /* Finish writing the packet header with an atomic release.  */
+  __atomic_store_n((uint16_t*)dispatch_packet, header, __ATOMIC_RELEASE);
+
+  hsa_queue_store_write_index_relaxed (hsa_queue, packet_id + 1);
+  
+  ;/* Run the kernel and wait for it to complete.  */
+  hsa_signal_store_relaxed(hsa_queue->doorbell_signal, packet_id);
+  while (hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_LT, 1,
+	UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0)
+    ;
+
+  /* Clean up HSA.  */
+  hsa_signal_destroy(signal);
+  free ((void*)args.out_ptr);
+  omp_target_free ((void*)args.heap_ptr, device_num);
+  omp_target_free ((void*)args.arena_ptr, device_num);
+  omp_target_free ((void*)args.stack_ptr, device_num);
+  omp_target_free ((void*)tgtaddrs.kernel_object, device_num);
+
+  /* Clean up OpenMP.  */
+  #pragma omp interop destroy(interop)
+
+  /* Bring the data back from the device.  */
+#pragma omp target exit data map(test_data_value)
+
+  /* Ensure the kernel was called twice.  Once by OpenMP, once by HSA.  */
+  assert (test_data_value == 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c b/libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c
new file mode 100644
index 0000000..ff82f4c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c
@@ -0,0 +1,18 @@
+/* Test that GOMP_task is special cased when cpyfn is NULL.  */
+
+/* { dg-do run } */
+/* { dg-options "-O3 -fopenmp -std=gnu99 -fdump-ipa-cp-details" } */
+/* { dg-require-effective-target fopenmp } */
+
+void test(int c) {
+  for (int i = 0; i < c; i++)
+    if (!__builtin_constant_p(c))
+      __builtin_abort();
+}
+int main() {
+#pragma omp task
+  test(7);
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump "Creating a specialized node of main._omp_fn" "cp" } } */
diff --git a/libgomp/testsuite/libgomp.c/ipcp-cb-spec2.c b/libgomp/testsuite/libgomp.c/ipcp-cb-spec2.c
new file mode 100644
index 0000000..30894d7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/ipcp-cb-spec2.c
@@ -0,0 +1,20 @@
+/* Check that GOMP_task doesn't produce callback edges when cpyfn is not
+   NULL.  */
+
+/* { dg-do run } */
+/* { dg-options "-O3 -fopenmp -std=gnu99 -fdump-ipa-cp-details" } */
+/* { dg-require-effective-target fopenmp } */
+
+void test(int *a) {
+  for (int i = 0; i < 100; i++) {
+    a[i] = i;
+  }
+}
+int main() {
+  int a[100];
+  __builtin_memset (a, 0, sizeof (a));
+  #pragma omp task
+  test (a);
+}
+
+/* { dg-final { scan-ipa-dump-not "Created callback edge" "cp" } } */
diff --git a/libgomp/testsuite/libgomp.c/ipcp-cb1.c b/libgomp/testsuite/libgomp.c/ipcp-cb1.c
new file mode 100644
index 0000000..e390f04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/ipcp-cb1.c
@@ -0,0 +1,24 @@
+/* Test that we can propagate constants into outlined OpenMP kernels.
+   This tests the underlying callback attribute and its related edges.  */
+
+/* { dg-do run } */
+/* { dg-options "-O3 -fopenmp -std=gnu99 -fdump-ipa-cp-details" } */
+/* { dg-require-effective-target fopenmp } */
+
+int a[100];
+void test(int c) {
+#pragma omp parallel for
+  for (int i = 0; i < c; i++) {
+    if (!__builtin_constant_p(c)) {
+      __builtin_abort();
+    }
+    a[i] = i;
+  }
+}
+int main() {
+  test(100);
+  return a[5] - 5;
+}
+
+/* { dg-final { scan-ipa-dump "Creating a specialized node of test._omp_fn" "cp" } } */
+/* { dg-final { scan-ipa-dump "Aggregate replacements: 0\\\[0]=100\\(by_ref\\)" "cp" } } */
diff --git a/libgomp/testsuite/libgomp.c/pr122281.c b/libgomp/testsuite/libgomp.c/pr122281.c
new file mode 100644
index 0000000..68fc3be
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr122281.c
@@ -0,0 +1,43 @@
+/* { dg-do run }  */
+/* { dg-additional-options "-O3" }  */
+
+/* PR libgomp/122281  */
+/* PR middle-end/105001  */
+
+/* If SIMT is supported, the inner 'omp simd' is duplicated into
+   one SIMT and one SIMD variant. SIMT is currently only supported
+   with nvidia GPUs.  (This only happens with -O1 or higher.)
+
+   The duplication failed for the SIMD case as a tree was shared and
+   the initialization only happened in the SIMT branch, i.e. when
+   compiling for a SIMT-device, all non-SIMD (offload or host devices)
+   accesses failed (segfault) for the atomic update.  */
+
+#include <omp.h>
+
+int __attribute__((noinline, noclone))
+f(int *A, int n, int dev) {
+ int cnt = 0;
+ #pragma omp target map(cnt) map(to:A[0:n]) device(dev)
+ {
+   #pragma omp parallel for simd
+   for (int i = 0; i < n; i++)
+   if (A[i] != 0)
+     {
+       #pragma omp atomic
+       cnt++;
+     }
+ }
+ return cnt;
+}
+
+int main() {
+  int n = 10;
+  int A[10] = {11,22,33,44,55,66,77,88,99,110};
+
+  /* Run over all devices, including the host; the host should be SIMD,
+     some non-host devices might be SIMT.  */
+  for (int dev = omp_initial_device; dev <= omp_get_num_devices(); dev++)
+    if (f (A, n, dev) != 10)
+      __builtin_abort();
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c
new file mode 100644
index 0000000..3220828
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c
@@ -0,0 +1,74 @@
+int
+main ()
+{
+  int i, n;
+  int data[] = {1,2};
+  struct S { int **ptrset; };
+
+// -----------------------------------
+
+/* The produced mapping for sptr1->ptrset[i][:n]
+
+   GOMP_MAP_STRUCT (size = 1)
+      GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+   GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+      GOMP_MAP_ATTACH
+   GOMP_MAP_ATTACH -> attaching to 2nd GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+
+which get split into 3 separate map_vars call; in particular,
+the latter is separate and points to an unmpapped variable.
+
+Thus, it failed with:
+   libgomp: pointer target not mapped for attach  */
+
+  struct S s1, *sptr1;
+  s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s1.ptrset[0] = data;
+  s1.ptrset[1] = data;
+  s1.ptrset[2] = data;
+  sptr1 = &s1;
+
+  i = 1;
+  n = 0;
+  #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3])
+  #pragma omp target enter data map(sptr1->ptrset[i][:n])
+
+  #pragma omp target exit data map(sptr1->ptrset[i][:n])
+  #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3])
+
+  __builtin_free (s1.ptrset);
+
+// -----------------------------------
+
+/* The produced mapping for sptr2->ptrset[i][:n] is similar:
+
+   GOMP_MAP_STRUCT (size = 1)
+      GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+   GOMP_MAP_TO  ! this one has now a finite size
+      GOMP_MAP_ATTACH
+   GOMP_MAP_ATTACH -> attach to the GOMP_MAP_TO
+
+As the latter GOMP_MAP_ATTACH has now a pointer target,
+the attachment worked.  */
+
+  struct S s2, *sptr2;
+  s2.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s2.ptrset[0] = data;
+  s2.ptrset[1] = data;
+  s2.ptrset[2] = data;
+  sptr2 = &s2;
+
+  i = 1;
+  n = 2;
+  #pragma omp target enter data map(sptr2[:1], sptr2->ptrset[:3])
+  #pragma omp target enter data map(sptr2->ptrset[i][:n])
+
+  #pragma omp target
+    if (sptr2->ptrset[1][0] != 1 || sptr2->ptrset[1][1] != 2)
+      __builtin_abort ();
+
+  #pragma omp target exit data map(sptr2->ptrset[i][:n])
+  #pragma omp target exit data map(sptr2[:1], sptr2->ptrset[:3])
+
+  __builtin_free (s2.ptrset);
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c
new file mode 100644
index 0000000..580c6ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c
@@ -0,0 +1,50 @@
+int
+main ()
+{
+  int i, n;
+  int data[] = {1,2};
+  struct S {
+    int **ptrset;
+    int **ptrset2;
+  };
+
+  /* This is the same as target-map-zero-sized-3.c, but by mixing
+     mapped and non-mapped items, the mapping before the ATTACH
+     might (or here: is) not actually associated with the the
+     pointer used for attaching. Thus, if one does a simple
+
+       if (openmp_p
+	   && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA)
+	   && mapnum == 1)
+     check in target.c's gomp_map_vars_internal will fail
+     as mapnum > 1 but still the map associated with this
+     ATTACH is in a different set.  */
+
+  struct S s1, *sptr1;
+  s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s1.ptrset2 = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s1.ptrset[0] = data;
+  s1.ptrset[1] = data;
+  s1.ptrset[2] = data;
+  s1.ptrset2[0] = data;
+  s1.ptrset2[1] = data;
+  s1.ptrset2[2] = data;
+  sptr1 = &s1;
+
+  i = 1;
+  n = 0;
+  #pragma omp target enter data map(data)
+  #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3])
+  #pragma omp target enter data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+
+  #pragma omp target map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+    if (sptr1->ptrset2[1][0] != 1 || sptr1->ptrset2[1][1] != 2)
+      __builtin_abort ();
+
+  #pragma omp target exit data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+  #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3])
+  #pragma omp target exit data map(data)
+
+  __builtin_free (s1.ptrset);
+  __builtin_free (s1.ptrset2);
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c
new file mode 100644
index 0000000..7c4ab80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O0" }  */
+
+/* Issue showed up in the real world when large data was distributed
+   over multiple MPI progresses - such that for one process n == 0
+   happend at run time.
+
+   Before  map(var[:0])  and  map(var[:n])  with n > 0 was handled,
+   this patch now also handles  map(var[:n]) with n == 0.
+
+   Failed before with "libgomp: pointer target not mapped for attach".  */
+
+/* Here, the base address is shifted - which should have no effect,
+   but must work as well.  */
+void
+with_offset ()
+{
+  struct S {
+     int *ptr1, *ptr2;
+  };
+  struct S s1, s2;
+  int *a, *b, *c, *d;
+  s1.ptr1 = (int *) 0L;
+  s1.ptr2 = (int *) 0xdeedbeef;
+  s2.ptr1 = (int *) 0L;
+  s2.ptr2 = (int *) 0xdeedbeef;
+  a = (int *) 0L;
+  b = (int *) 0xdeedbeef;
+  c = (int *) 0L;
+  d = (int *) 0xdeedbeef;
+
+  int n1, n2, n3, n4;
+  n1 = n2 = n3 = n4 = 0;
+
+  #pragma omp target enter data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+
+  #pragma omp target map(s2.ptr1[4:n1], s2.ptr2[2:n2], c[6:n3], d[9:n4])
+  {
+    if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef
+	|| c != (void *) 0L || d != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target exit data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+}
+
+int
+main ()
+{
+  struct S {
+     int *ptr1, *ptr2;
+  };
+  struct S s1, s2;
+  int *a, *b, *c, *d;
+  s1.ptr1 = (int *) 0L;
+  s1.ptr2 = (int *) 0xdeedbeef;
+  s2.ptr1 = (int *) 0L;
+  s2.ptr2 = (int *) 0xdeedbeef;
+  a = (int *) 0L;
+  b = (int *) 0xdeedbeef;
+  c = (int *) 0L;
+  d = (int *) 0xdeedbeef;
+
+  int n1, n2, n3, n4;
+  n1 = n2 = n3 = n4 = 0;
+
+  #pragma omp target enter data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+
+  #pragma omp target map(s2.ptr1[:n1], s2.ptr2[:n2], c[:n3], d[:n4])
+  {
+    if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef
+	|| c != (void *) 0L || d != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target exit data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+
+  with_offset ();
+}