40 files changed, 2610 insertions, 23 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 096e17b..49a62d4 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,53 @@
+2025-04-24  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/lib/libgomp.exp
+	(check_effective_target_gomp_hip_header_nvidia): Compile with
+	"-Wno-deprecated-declarations".
+	* testsuite/libgomp.c/interop-hip-nvidia-full.c: Likewise.
+	* testsuite/libgomp.c/interop-hipblas-nvidia-full.c: Likewise.
+	* testsuite/libgomp.c/interop-hipblas.h: Add workarounds
+	when using the HIP headers with __HIP_PLATFORM_NVIDIA__.
+
+2025-04-24  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/lib/libgomp.exp (check_effective_target_openacc_cublas,
+	check_effective_target_openacc_cudart): Update description as
+	the check requires more.
+	(check_effective_target_openacc_libcuda,
+	check_effective_target_openacc_libcublas,
+	check_effective_target_openacc_libcudart,
+	check_effective_target_gomp_hip_header_amd,
+	check_effective_target_gomp_hip_header_nvidia,
+	check_effective_target_gomp_hipfort_module,
+	check_effective_target_gomp_libamdhip64,
+	check_effective_target_gomp_libhipblas): New.
+	* testsuite/libgomp.c-c++-common/interop-2.c: New test.
+	* testsuite/libgomp.c/interop-cublas-full.c: New test.
+	* testsuite/libgomp.c/interop-cublas-libonly.c: New test.
+	* testsuite/libgomp.c/interop-cuda-full.c: New test.
+	* testsuite/libgomp.c/interop-cuda-libonly.c: New test.
+	* testsuite/libgomp.c/interop-hip-amd-full.c: New test.
+	* testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: New test.
+	* testsuite/libgomp.c/interop-hip-nvidia-full.c: New test.
+	* testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: New test.
+	* testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: New test.
+	* testsuite/libgomp.c/interop-hip.h: New test.
+	* testsuite/libgomp.c/interop-hipblas-amd-full.c: New test.
+	* testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c: New test.
+	* testsuite/libgomp.c/interop-hipblas-nvidia-full.c: New test.
+	* testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c: New test.
+	* testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c: New test.
+	* testsuite/libgomp.c/interop-hipblas.h: New test.
+	* testsuite/libgomp.fortran/interop-hip-amd-full.F90: New test.
+	* testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: New test.
+	* testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: New test.
+	* testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: New test.
+	* testsuite/libgomp.fortran/interop-hip.h: New test.
+
+2025-04-23  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.fortran/target-enter-data-8.f90: New test.
+
 2025-04-17  Jakub Jelinek  <jakub@redhat.com>
 
 	PR libgomp/119849
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index e3202ae..19479ae 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
 	target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
 	oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
 	priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-	oacc-target.c target-indirect.c
+	oacc-target.c target-indirect.c target-cxa-dso-dtor.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 2a0a842..6d22b3d 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
 	oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
 	oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
 	affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
-	oacc-target.lo target-indirect.lo $(am__objects_1)
+	oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \
+	$(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
 	oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
 	affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-	oacc-target.c target-indirect.c $(am__append_3)
+	oacc-target.c target-indirect.c target-cxa-dso-dtor.c \
+	$(am__append_3)
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -780,6 +782,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
diff --git a/libgomp/config/accel/target-cxa-dso-dtor.c b/libgomp/config/accel/target-cxa-dso-dtor.c
new file mode 100644
index 0000000..e40a5f0
--- /dev/null
+++ b/libgomp/config/accel/target-cxa-dso-dtor.c
@@ -0,0 +1,62 @@
+/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "libgomp.h"
+
+extern void __cxa_finalize (void *);
+
+/* See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>.
+
+   Even if the device is '!DEFAULT_USE_CXA_ATEXIT', we may see '__cxa_atexit'
+   calls, referencing '__dso_handle', via a 'DEFAULT_USE_CXA_ATEXIT' host.
+   '__cxa_atexit' is provided by newlib, but use of '__dso_handle' for nvptx
+   results in 'ld' error:
+
+       unresolved symbol __dso_handle
+       collect2: error: ld returned 1 exit status
+       nvptx mkoffload: fatal error: [...]/x86_64-pc-linux-gnu-accel-nvptx-none-gcc returned 1 exit status
+
+   ..., or for GCN get an implicit definition (running with
+   '--trace-symbol=__dso_handle'):
+
+       ./a.xamdgcn-amdhsa.mkoffload.hsaco-a.xamdgcn-amdhsa.mkoffload.2.o: reference to __dso_handle
+       <internal>: definition of __dso_handle
+
+   ..., which might be fine, but let's just make it explicit.  */
+
+/* There are no DSOs; this is the main program.  */
+attribute_hidden void * const __dso_handle = 0;
+
+/* If this file gets linked in, that means that '__dso_handle' has been
+   referenced (for '__cxa_atexit'), and in that case, we also have to run
+   '__cxa_finalize'.  Make that happen by overriding the weak libgcc dummy
+   function '__GCC_offload___cxa_finalize'.  */
+
+void
+__GCC_offload___cxa_finalize (void *dso_handle)
+{
+  __cxa_finalize (dso_handle);
+}
diff --git a/libgomp/target-cxa-dso-dtor.c b/libgomp/target-cxa-dso-dtor.c
new file mode 100644
index 0000000..d1a898d
--- /dev/null
+++ b/libgomp/target-cxa-dso-dtor.c
@@ -0,0 +1,3 @@
+/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API */
+
+/* Nothing needed here.  */
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index bc38e3c..54f2f708 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -553,7 +553,23 @@ int main() {
 } } "-lcuda" ]
 }
 
-# Return 1 if cublas_v2.h and -lcublas are available.
+# Return 1 if -lcuda is available (header not required).
+
+proc check_effective_target_openacc_libcuda { } {
+    return [check_no_compiler_messages openacc_libcuda executable {
+typedef enum { CUDA_SUCCESS } CUresult;
+typedef int CUdevice;
+CUresult cuDeviceGet (CUdevice *, int);
+int main() {
+    CUdevice dev;
+    CUresult r = cuDeviceGet (&dev, 0);
+    if (r != CUDA_SUCCESS)
+	return 1;
+    return 0;
+} } "-lcuda" ]
+}
+
+# Return 1 if cublas_v2.h, cuda.h, -lcublas and -lcuda are available.
 
 proc check_effective_target_openacc_cublas { } {
     return [check_no_compiler_messages openacc_cublas executable {
@@ -573,7 +589,25 @@ int main() {
 } } "-lcuda -lcublas" ]
 }
 
-# Return 1 if cuda_runtime_api.h and -lcudart are available.
+# Return 1 if -lcublas is available header not required).
+
+proc check_effective_target_openacc_libcublas { } {
+    return [check_no_compiler_messages openacc_libcublas executable {
+typedef enum { CUBLAS_STATUS_SUCCESS } cublasStatus_t;
+typedef struct cublasContext* cublasHandle_t;
+#define cublasCreate cublasCreate_v2
+cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+int main() {
+    cublasStatus_t s;
+    cublasHandle_t h;
+    s = cublasCreate (&h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+	return 1;
+    return 0;
+} } "-lcublas" ]
+}
+
+# Return 1 if cuda_runtime_api.h, cuda.h, -lcuda and -lcudart are available.
 
 proc check_effective_target_openacc_cudart { } {
     return [check_no_compiler_messages openacc_cudart executable {
@@ -592,3 +626,98 @@ int main() {
     return 0;
 } } "-lcuda -lcudart" ]
 }
+
+# Return 1 if -lcudart is available (no header required).
+
+proc check_effective_target_openacc_libcudart { } {
+    return [check_no_compiler_messages openacc_libcudart executable {
+typedef int cudaError_t;
+cudaError_t cudaGetDevice(int *);
+enum { cudaSuccess };
+int main() {
+    cudaError_t e;
+    int devn;
+    e = cudaGetDevice (&devn);
+    if (e != cudaSuccess)
+	return 1;
+    return 0;
+} } "-lcudart" ]
+}
+
+# Return 1 if hip.h is available (no link check; AMD platform).
+
+proc check_effective_target_gomp_hip_header_amd { } {
+    return [check_no_compiler_messages gomp_hip_header_amd assembly {
+#define __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime_api.h>
+int main() {
+    hipDevice_t dev;
+    hipError_t r = hipDeviceGet (&dev, 0);
+    if (r != hipSuccess)
+	return 1;
+    return 0;
+} }]
+}
+
+# Return 1 if hip.h is available (no link check; Nvidia/CUDA platform).
+
+proc check_effective_target_gomp_hip_header_nvidia { } {
+    return [check_no_compiler_messages gomp_hip_header_nvidia assembly {
+#define __HIP_PLATFORM_NVIDIA__
+#include <hip/hip_runtime_api.h>
+int main() {
+    hipDevice_t dev;
+    hipError_t r = hipDeviceGet (&dev, 0);
+    if (r != hipSuccess)
+	return 1;
+    return 0;
+} } "-Wno-deprecated-declarations"]
+}
+
+# Return 1 if the Fortran hipfort module is available (no link check)
+
+proc check_effective_target_gomp_hipfort_module { } {
+    return [check_no_compiler_messages gomp_hipfort_module assembly {
+! Fortran
+use hipfort
+implicit none
+integer(kind(hipSuccess)) :: r
+integer(c_int) :: dev
+r = hipDeviceGet (dev, 0)
+if (r /= hipSuccess) error stop
+end
+}]
+}
+
+# Return 1 if AMD HIP's -lamdhip64 is available (no header required).
+
+proc check_effective_target_gomp_libamdhip64 { } {
+    return [check_no_compiler_messages gomp_libamdhip64 executable {
+typedef int hipError_t;
+typedef int hipDevice_t;
+enum { hipSuccess = 0 };
+hipError_t hipDeviceGet(hipDevice_t*, int);
+int main() {
+    hipDevice_t dev;
+    hipError_t r = hipDeviceGet (&dev, 0);
+    if (r != hipSuccess)
+	return 1;
+    return 0;
+} } "-lamdhip64" ]
+}
+
+# Return 1 if AMD HIP's -lamdhip64 is available (no header required).
+
+proc check_effective_target_gomp_libhipblas { } {
+    return [check_no_compiler_messages gomp_libhipblas executable {
+typedef enum { HIPBLAS_STATUS_SUCCESS = 0 } hipblasStatus_t;
+typedef void* hipblasHandle_t;
+hipblasStatus_t hipblasCreate (hipblasHandle_t*);
+int main() {
+    hipblasHandle_t handle;
+    hipblasStatus_t stat = hipblasCreate (&handle);
+    if (stat != HIPBLAS_STATUS_SUCCESS)
+	return 1;
+    return 0;
+} } "-lhipblas" ]
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
new file mode 100644
index 0000000..ecb029e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
@@ -0,0 +1,104 @@
+/* Offloaded C++ objects construction and destruction.  */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+  int x;
+
+  S()
+    : x(-1)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  S(int x)
+    : x(x)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  ~S()
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+};
+
+#pragma omp end declare target
+
+S sH1(7);
+
+#pragma omp declare target
+
+S sHD1(5);
+
+std::vector<S> svHD1(2);
+
+#pragma omp end declare target
+
+S sH2(3);
+
+int main()
+{
+  int c = 0;
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+  return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+   For the host, there are four expected calls:
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+
+   For the device, there are two expected calls:
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+*/
+
+/* C++ objects are constructed in order of appearance (..., and destructed in reverse order).
+   { dg-output {S, 7, 1[\r\n]+} }
+   { dg-output {S, 5, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, 3, 1[\r\n]+} }
+   { dg-output {main:1, 1[\r\n]+} }
+   { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:4, 1[\r\n]+} }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, 3, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, 5, 1[\r\n]+} }
+   { dg-output {~S, 7, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
new file mode 100644
index 0000000..75e48ca
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
@@ -0,0 +1,140 @@
+/* Offloaded 'constructor' and 'destructor' functions, and C++ objects construction and destruction.  */
+
+/* { dg-require-effective-target init_priority } */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+  int x;
+
+  S()
+    : x(-1)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  S(int x)
+    : x(x)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  ~S()
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+};
+
+#pragma omp end declare target
+
+S sH1 __attribute__((init_priority(1500))) (7);
+
+#pragma omp declare target
+
+S sHD1 __attribute__((init_priority(2000))) (5);
+
+std::vector<S> svHD1 __attribute__((init_priority(1000))) (2);
+
+static void
+__attribute__((constructor(20000)))
+initDH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(20000)))
+finiDH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+#pragma omp end declare target
+
+S sH2 __attribute__((init_priority(500))) (3);
+
+static void
+__attribute__((constructor(10000)))
+initH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(10000)))
+finiH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+int main()
+{
+  int c = 0;
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+  return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+   For the host, there are four expected calls:
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+
+   For the device, there are two expected calls:
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+*/
+
+/* Defined order in which 'constructor' functions, and 'destructor' functions are run, and C++ objects are constructed (..., and destructed in reverse order).
+   { dg-output {S, 3, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, 7, 1[\r\n]+} }
+   { dg-output {S, 5, 1[\r\n]+} }
+   { dg-output {initH1, 1[\r\n]+} }
+   { dg-output {initDH1, 1[\r\n]+} }
+   { dg-output {main:1, 1[\r\n]+} }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {initDH1, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:2, 1[\r\n]+} { target  { ! offload_device } } }
+   { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:4, 1[\r\n]+} }
+   { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {finiDH1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, 5, 1[\r\n]+} }
+   { dg-output {~S, 7, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, 3, 1[\r\n]+} }
+   { dg-output {finiDH1, 1[\r\n]+} }
+   { dg-output {finiH1, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
index 3cdedf4..d4dccf1 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
@@ -14,8 +14,10 @@
 
 /* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
    '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
-   { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
-   { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   ..., just 'targetm.arm_eabi_unwinder' is different:
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
    { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
    { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
    Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails:
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
index ef996cf..724e34b 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
@@ -14,8 +14,10 @@
 
 /* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
    '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
-   { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
-   { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   ..., just 'targetm.arm_eabi_unwinder' is different:
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
    { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
    { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
    Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails:
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
index 24e3d07..24eb7a5 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
@@ -51,7 +51,9 @@ int main()
 
 /* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
    '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
-   { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
-   { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   ..., just 'targetm.arm_eabi_unwinder' is different:
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
    { dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
    { dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
index 2467061..a4e7a10 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
@@ -4,9 +4,6 @@
    { dg-additional-options -fexceptions } */
 /* { dg-additional-options -fdump-tree-optimized-raw }
    { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target offload_target_amdgcn xfail *-*-* } 0 }
-   { dg-ice PR119737 { offload_target_amdgcn } }
-   { dg-excess-errors {'mkoffload' failures etc.} { xfail offload_target_amdgcn } } */
 
 #include "../libgomp.oacc-c++/exceptions-throw-1.C"
 
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
index e85e6c3..97f4845 100644
--- a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
@@ -4,9 +4,6 @@
    { dg-additional-options -fexceptions } */
 /* { dg-additional-options -fdump-tree-optimized-raw }
    { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target offload_target_amdgcn xfail *-*-* } 0 }
-   { dg-ice PR119737 { offload_target_amdgcn } }
-   { dg-excess-errors {'mkoffload' failures etc.} { xfail offload_target_amdgcn } } */
 
 #include "../libgomp.oacc-c++/exceptions-throw-2.C"
 
diff --git a/libgomp/testsuite/libgomp.c-c++-common/interop-2.c b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c
new file mode 100644
index 0000000..a7526dc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c
@@ -0,0 +1,129 @@
+/* { dg-do run } */
+/* { dg-additional-options "-lm" } */
+
+/* Note: At the time this program was written, Nvptx was not asynchronous
+   enough to trigger the issue (with a 'nowait' added); however, one
+   AMD GPUs, it triggered.  */
+
+/* Test whether nowait / dependency is handled correctly.
+   Motivated by OpenMP_VV's 5.1/interop/test_interop_target.c
+
+   The code actually only creates a streaming object without actually using it,
+   except for dependency tracking.
+
+   Note that there is a difference between having a steaming (targetsync) object
+   and not (= omp_interop_none); at least if one assumes that omp_interop_none
+   does not include 'targetsync' as (effective) interop type - in that case,
+   'nowait' has no effect and the 'depend' is active as included task, otherwise
+   the code continues with the depend being active only for the about to be
+   destroyed or used thread.
+
+   The OpenMP spec states (here 6.0):
+     "If the interop-type set includes 'targetsync', an empty mergeable task is
+      generated.  If the 'nowait' clause is not present on the construct then
+      the task is also an included task. If the interop-type set does not
+      include 'targetsync', the 'nowait' clause has no effect.  Any depend
+      clauses that are present on the construct apply to the generated task.  */
+
+#include <omp.h>
+
+void
+test_async (const int dev)
+{
+  constexpr int N = 2048;
+  constexpr int ulp = 4;
+  constexpr double M_PI = 2.0 * __builtin_acos (0.0);
+  omp_interop_t obj1, obj2;
+  double A[N] = { };
+  int B[N] = { };
+
+  /* Create interop object.  */
+  #pragma omp interop device(dev) init(targetsync : obj1, obj2)
+
+  if (dev == omp_initial_device || dev == omp_get_num_devices ())
+    {
+      if (obj1 != omp_interop_none || obj2 != omp_interop_none)
+	__builtin_abort ();
+    }
+  else
+    {
+      if (obj1 == omp_interop_none || obj2 == omp_interop_none)
+	__builtin_abort ();
+    }
+
+  /* DOUBLE */
+
+  /* Now in the background update it, slowly enough that the
+     code afterwards is reached while still running asynchronously.
+     As OpenMP_VV's Issue #863 shows, the overhead is high enough to
+     fail even when only doing an atomic integer increment.  */
+
+  #pragma omp target device(dev) map(A) depend(out: A[:N]) nowait
+  for (int i = 0; i < N; i++)
+    #pragma omp atomic update
+    A[i] += __builtin_sin (2*i*M_PI/N);
+
+  /* DESTROY take care of the dependeny such that ... */
+
+  if (obj1 == omp_interop_none)
+    {
+      // Same as below as 'nowait' is ignored.
+      #pragma omp interop destroy(obj1) depend(in: A[:N]) nowait
+    }
+  else
+    {
+      #pragma omp interop destroy(obj1) depend(in: A[:N])
+    }
+
+  /* ... this code is only executed once the dependency as been fulfilled.  */
+
+  /* Check the value - part I: quick, avoid A[0] == sin(0) = 0.  */
+  for (int i = 1; i < N; i++)
+    if (A[i] == 0.0)
+      __builtin_abort ();
+
+  /* Check the value - part II: throughly */
+  for (int i = 0; i < N; i++)
+    {
+      double x = A[i];
+      double y = __builtin_sin (2*i*M_PI/N);
+      if (__builtin_fabs (x - y) > ulp * __builtin_fabs (x+y) * __DBL_EPSILON__)
+	__builtin_abort ();
+    }
+
+  /* Integer */
+
+  #pragma omp target device(dev) map(B) depend(out: B[:N]) nowait
+  for (int i = 0; i < N; i++)
+    #pragma omp atomic update
+    B[i] += 42;
+
+  /* Same - but using USE.  */
+  if (obj2 == omp_interop_none)
+    {
+      // Same as below as 'nowait' is ignored.
+      #pragma omp interop use(obj2) depend(in: B[:N]) nowait
+    }
+  else
+    {
+      #pragma omp interop use(obj2) depend(in: B[:N])
+    }
+
+  for (int i = 0; i < N; i++)
+    if (B[i] != 42)
+      __builtin_abort ();
+
+  #pragma omp interop destroy(obj2)
+}
+
+int
+main ()
+{
+  int ndev = omp_get_num_devices ();
+
+  for (int dev = 0; dev <= ndev; dev++)
+    test_async (dev);
+  test_async (omp_initial_device);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c
new file mode 100644
index 0000000..e6099cf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c
@@ -0,0 +1,89 @@
+/* Offloaded 'constructor' and 'destructor' functions.  */
+
+#include <omp.h>
+
+#pragma omp declare target
+
+static void
+__attribute__((constructor))
+initHD1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((constructor))
+initHD2()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiHD1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiHD2()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+#pragma omp end declare target
+
+static void
+__attribute__((constructor))
+initH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiH2()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+int main()
+{
+  int c = 0;
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+  return 0;
+}
+
+/* The order is undefined, in which same-priority 'constructor' functions, and 'destructor' functions are run.
+   { dg-output {init[^,]+, 1[\r\n]+} }
+   { dg-output {init[^,]+, 1[\r\n]+} }
+   { dg-output {init[^,]+, 1[\r\n]+} }
+   { dg-output {main:1, 1[\r\n]+} }
+   { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } }
+   { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:3, 1[\r\n]+} { target  { ! offload_device } } }
+   { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:4, 1[\r\n]+} }
+   { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } }
+   { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } }
+   { dg-output {fini[^,]+, 1[\r\n]+} }
+   { dg-output {fini[^,]+, 1[\r\n]+} }
+   { dg-output {fini[^,]+, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-full.c b/libgomp/testsuite/libgomp.c/interop-cublas-full.c
new file mode 100644
index 0000000..2df5277
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cublas-full.c
@@ -0,0 +1,176 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-additional-options "-lcublas" } */
+
+/* NOTE: This file is also included by libgomp.c-c++-common/interop-cudablas-libonly.c
+   to test the fallback version.  */
+
+/* Check whether cuBlas' daxpy works with an interop object.
+     daxpy(N, DA, DX, INCX, DY, INCY)
+   calculates (for DX = DY = 1):
+     DY(1:N) =  DY(1:N) + DA * DX(1:N)
+   and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document  */
+
+/* Minimal check whether CUDA works - by checking whether the API routines
+   seem to work.  This includes a fallback if the header is not
+   available.  */
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+  #include <cuda.h>
+  #include <cudaTypedefs.h>
+  #include <cuda_runtime.h>
+  #include <cublas_v2.h>
+
+#else
+  /* Add a poor man's fallback declaration.  */
+  #if USE_CUDA_FALLBACK_HEADER
+    // Don't warn.
+  #elif !__has_include(<cuda.h>)
+    #warning "Using GCC's cuda.h as fallback for cuda.h"
+  #elif !__has_include(<cudaTypedefs.h>)
+    #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+  #elif !__has_include(<cuda_runtime.h>)
+    #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+  #else
+    #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+  #endif
+  #include "../../../include/cuda/cuda.h"
+
+  typedef enum {
+    CUBLAS_STATUS_SUCCESS = 0,
+  } cublasStatus_t;
+
+  typedef CUstream cudaStream_t;
+  typedef struct cublasContext* cublasHandle_t;
+
+  #define cublasCreate cublasCreate_v2
+  cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+  #define cublasSetStream cublasSetStream_v2
+  cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+  #define cublasDaxpy cublasDaxpy_v2
+  cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+#endif
+
+static int used_variant = 0;
+
+void
+run_cuBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+  used_variant = 1;
+
+  omp_interop_rc_t res;
+  cublasStatus_t stat;
+
+  omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success && fr == omp_ifr_cuda);
+
+  cudaStream_t stream = (cudaStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  cublasHandle_t handle;
+  stat = cublasCreate (&handle);
+  assert (stat == CUBLAS_STATUS_SUCCESS);
+
+  stat = cublasSetStream (handle, stream);
+  assert (stat == CUBLAS_STATUS_SUCCESS);
+
+  /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space.  */
+  stat = cublasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+  assert (stat == CUBLAS_STATUS_SUCCESS);
+}
+
+
+#pragma omp declare variant(run_cuBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("cuda")))
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+  used_variant = 2;
+
+  if (incx == 1 && incy == 1)
+    #pragma omp simd
+    for (int i = 0; i < n; i++)
+      dy[i] += da * dx[i];
+  else
+    {
+      int ix = 0;
+      int iy = 0;
+      for (int i = 0; i < n; i++)
+	{
+	  dy[iy] += da * dx[ix];
+	  ix += incx;
+	  iy += incy;
+	}
+    }
+}
+
+
+void
+run_test (int dev)
+{
+  constexpr int N = 1024;
+
+  // A = {1,2,...,N}
+  // B = {-1, -2, ..., N}
+  // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+  //    = B + 3*A
+  // -> B' = {0, 2, 4, 6, ... }
+
+  double A[N], B[N];
+  double factor = 3.0;
+  for (int i = 0; i < N; i++)
+    {
+      A[i] = i;
+      B[i] = -i;
+    }
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target enter data device(dev) map(A, B)
+    }
+
+  used_variant = 99;
+  #pragma omp dispatch device(dev)
+    run_daxpy (N, factor, A, 1, B, 1);  
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+      int tmp = omp_get_default_device ();
+      omp_set_default_device (dev);
+      if (on_device_arch_nvptx ())
+	assert (used_variant == 1);
+      else
+	assert (used_variant == 2);
+      omp_set_default_device (tmp);
+    }
+  else
+    assert (used_variant == 2);
+
+  for (int i = 0; i < N; i++)
+    assert (B[i] == 2*i);
+}
+
+int   
+main () 
+{   
+  int ndev = omp_get_num_devices ();
+
+  for (int dev = 0; dev <= ndev; dev++)
+    run_test (dev);
+  run_test (omp_initial_device);  
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c
new file mode 100644
index 0000000..89c0652
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target openacc_libcublas } */
+/* { dg-additional-options "-lcublas" } */
+
+/* Same as interop-cudablas-full.c, but also works if the header is not available. */
+
+#define USE_CUDA_FALLBACK_HEADER 1
+#include "interop-cublas-full.c"
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-full.c b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
new file mode 100644
index 0000000..38aa6b1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
@@ -0,0 +1,159 @@
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+/* NOTE: This file is also included by libgomp.c-c++-common/interop-cuda-libonly.c
+   to test the fallback version, which defines USE_CUDA_FALLBACK_HEADER.  */
+
+/* Minimal check whether CUDA works - by checking whether the API routines
+   seem to work.  This includes a fallback if the header is not
+   available.  */
+
+#include <assert.h>
+#include <omp.h>
+
+#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+  #include <cuda.h>
+  #include <cudaTypedefs.h>
+  #include <cuda_runtime.h>
+
+#else
+  /* Add a poor man's fallback declaration.  */
+  #if USE_CUDA_FALLBACK_HEADER
+    // Don't warn.
+  #elif !__has_include(<cuda.h>)
+    #warning "Using GCC's cuda.h as fallback for cuda.h"
+  #elif !__has_include(<cudaTypedefs.h>)
+    #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+  #else
+    #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+  #endif
+  #include "../../../include/cuda/cuda.h"
+
+  typedef int cudaError_t;
+  typedef CUstream cudaStream_t;
+  enum {
+    cudaSuccess = 0
+  };
+
+  enum cudaDeviceAttr {
+    cudaDevAttrClockRate = 13,
+    cudaDevAttrMaxGridDimX = 5
+  };
+
+  cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int);
+  cudaError_t cudaStreamQuery(cudaStream_t);
+  CUresult cuCtxGetApiVersion(CUcontext, unsigned int *);
+  CUresult cuStreamGetCtx (CUstream, CUcontext *);
+#endif
+
+int
+main ()
+{
+  int ivar;
+  unsigned uvar;
+  omp_interop_rc_t res;
+  omp_interop_t obj_cuda = omp_interop_none;
+  omp_interop_t obj_cuda_driver = omp_interop_none;
+  cudaError_t cuda_err;
+  CUresult cu_err;
+
+  #pragma omp interop init(target, targetsync, prefer_type("cuda") : obj_cuda) \
+		      init(target, targetsync, prefer_type("cuda_driver") : obj_cuda_driver) \
+
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success);
+  assert (fr == omp_ifr_cuda);
+
+  fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda_driver, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success);
+  assert (fr == omp_ifr_cuda_driver);
+
+  ivar = (int) omp_get_interop_int (obj_cuda, omp_ipr_vendor, &res);
+  assert (res == omp_irc_success);
+  assert (ivar == 11);
+
+  ivar = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_vendor, &res);
+  assert (res == omp_irc_success);
+  assert (ivar == 11);
+
+
+  /* Check whether the omp_ipr_device -> cudaDevice_t yields a valid device.  */
+
+  CUdevice cu_dev = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_device, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume a clock size is available and > 1 GHz; value is in kHz.  */
+  cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cu_dev);
+  assert (cu_err == CUDA_SUCCESS);
+  assert (ivar > 1000000 /* kHz */);
+
+  /* Assume that the MaxGridDimX is available and > 1024.  */
+  cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cu_dev);
+  assert (cu_err == CUDA_SUCCESS);
+  assert (ivar > 1024);
+
+  int cuda_dev = (int) omp_get_interop_int (obj_cuda, omp_ipr_device, &res);
+  assert (res == omp_irc_success);
+  assert (cuda_dev == (CUdevice) cu_dev); // Assume they are the same ...
+
+  /* Assume a clock size is available and > 1 GHz; value is in kHz.  */
+  cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cuda_dev);
+  assert (cuda_err == cudaSuccess);
+  assert (ivar > 1000000 /* kHz */);
+
+  /* Assume that the MaxGridDimX is available and > 1024.  */
+  cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cuda_dev);
+  assert (cuda_err == cudaSuccess);
+  assert (ivar > 1024);
+
+
+
+
+  /* Check whether the omp_ipr_device_context -> CUcontext yields a context.  */
+
+  CUcontext cu_ctx = (CUcontext) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_device_context, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume API Version > 0 for Nvidia, cudaErrorNotSupported for AMD.  */
+  uvar = 99;
+  cu_err = cuCtxGetApiVersion (cu_ctx, &uvar);
+  assert (cu_err == CUDA_SUCCESS);
+  assert (uvar > 0);
+
+
+  /* Check whether the omp_ipr_targetsync -> cudaStream_t yields a stream.  */
+
+  cudaStream_t cuda_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  CUstream cu_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  assert ((void*) cu_sm != (void*) cuda_sm); // Type compatible but should have created two streams
+
+  int dev_stream = 99;
+#if CUDA_VERSION >= 12080
+  cuda_err = cudaStreamGetDevice (cuda_sm, &dev_stream);
+  assert (cuda_err == cudaSuccess);
+#else
+  cu_err = cuStreamGetCtx (cu_sm, &cu_ctx) != CUDA_SUCCESS;
+  if (cu_err == CUDA_SUCCESS)
+    cuda_err = cuCtxPushCurrent (cu_ctx) != CUDA_SUCCESS;
+  if (cu_err == CUDA_SUCCESS)
+    cuda_err = cuCtxGetDevice (&dev_stream) != CUDA_SUCCESS;
+  if (cu_err == CUDA_SUCCESS)
+    cu_err = cuCtxPopCurrent (&cu_ctx) != CUDA_SUCCESS;
+  assert (cu_err == CUDA_SUCCESS);
+#endif
+  assert (dev_stream == cuda_dev);
+
+  /* All jobs should have been completed (as there were none none)  */
+  cuda_err = cudaStreamQuery (cuda_sm);
+  assert (cuda_err == cudaSuccess);
+
+  cu_err = cuStreamQuery (cu_sm);
+  assert (cu_err == CUDA_SUCCESS);
+
+  #pragma omp interop destroy(obj_cuda, obj_cuda_driver)
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
new file mode 100644
index 0000000..17cbb15
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target openacc_libcudart } */
+/* { dg-require-effective-target openacc_libcuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+/* Same as interop-cuda-full.c, but also works if the header is not available. */
+
+#define USE_CUDA_FALLBACK_HEADER 1
+#include "interop-cuda-full.c"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
new file mode 100644
index 0000000..d7725fc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target gomp_hip_header_amd } */
+/* { dg-require-effective-target gomp_libamdhip64 } */
+/* { dg-additional-options "-lamdhip64" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
new file mode 100644
index 0000000..2584537
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target gomp_libamdhip64 } */
+/* { dg-additional-options "-lamdhip64" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
new file mode 100644
index 0000000..79af47d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-require-effective-target gomp_hip_header_nvidia } */
+/* { dg-additional-options "-lcuda -lcudart -Wno-deprecated-declarations" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
new file mode 100644
index 0000000..4586398
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
@@ -0,0 +1,10 @@
+/* { dg-require-effective-target openacc_libcudart } */
+/* { dg-require-effective-target openacc_libcuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+#define USE_CUDA_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
new file mode 100644
index 0000000..4186984
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
@@ -0,0 +1,9 @@
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip.h b/libgomp/testsuite/libgomp.c/interop-hip.h
new file mode 100644
index 0000000..20a1ccb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip.h
@@ -0,0 +1,234 @@
+/* Minimal check whether HIP works - by checking whether the API routines
+   seem to work.  This includes various fallbacks if the header is not
+   available.  */
+
+#include <assert.h>
+#include <omp.h>
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if __has_include(<hip/hip_runtime_api.h>) && !defined(USE_HIP_FALLBACK_HEADER)
+  #include <hip/hip_runtime_api.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_AMD__"
+  #endif
+
+  typedef struct ihipStream_t* hipStream_t;
+  typedef struct ihipCtx_t* hipCtx_t;
+  typedef int hipError_t;
+  typedef int hipDevice_t;
+  enum {
+    hipSuccess = 0,
+    hipErrorNotSupported = 801
+  };
+
+  typedef enum hipDeviceAttribute_t {
+    hipDeviceAttributeClockRate = 5,
+    hipDeviceAttributeMaxGridDimX = 29
+  } hipDeviceAttribute_t;
+
+  hipError_t hipDeviceGetAttribute (int *, hipDeviceAttribute_t, hipDevice_t);
+  hipError_t hipCtxGetApiVersion (hipCtx_t, int *);
+  hipError_t hipStreamGetDevice (hipStream_t, hipDevice_t *);
+  hipError_t hipStreamQuery (hipStream_t);
+
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_NVIDIA__"
+  #endif
+
+  #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+    #include <cuda.h>
+    #include <cudaTypedefs.h>
+    #include <cuda_runtime.h>
+  #else
+    #if defined(USE_CUDA_FALLBACK_HEADER)
+       // no warning
+    #elif !__has_include(<cuda.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda.h"
+    #elif !__has_include(<cudaTypedefs.h>)
+      #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+    #else
+      #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+    #endif
+
+    #include "../../../include/cuda/cuda.h"
+
+    typedef int cudaError_t;
+    enum {
+      cudaSuccess = 0
+    };
+
+    enum cudaDeviceAttr {
+      cudaDevAttrClockRate = 13,
+      cudaDevAttrMaxGridDimX = 5
+    };
+
+    cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int);
+    CUresult cuCtxGetApiVersion(CUcontext, unsigned int *);
+    CUresult cuStreamGetCtx (CUstream, CUcontext *);
+  #endif
+
+  typedef CUstream hipStream_t;
+  typedef CUcontext hipCtx_t;
+  typedef CUdevice hipDevice_t;
+
+  typedef int hipError_t;
+  typedef int hipDevice_t;
+  enum {
+    hipSuccess = 0,
+    hipErrorNotSupported = 801
+  };
+
+
+  typedef enum hipDeviceAttribute_t {
+    hipDeviceAttributeClockRate = 5,
+    hipDeviceAttributeMaxGridDimX = 29
+  } hipDeviceAttribute_t;
+
+  inline static hipError_t
+  hipDeviceGetAttribute (int *ival, hipDeviceAttribute_t attr, hipDevice_t dev)
+  {
+    enum cudaDeviceAttr cuattr;
+    switch (attr)
+      {
+      case hipDeviceAttributeClockRate:
+	cuattr = cudaDevAttrClockRate;
+	break;
+      case hipDeviceAttributeMaxGridDimX:
+	cuattr = cudaDevAttrMaxGridDimX;
+	break;
+      default:
+	assert (0);
+      }
+    return cudaDeviceGetAttribute (ival, cuattr, dev) != cudaSuccess;
+  }
+
+  inline static hipError_t
+  hipCtxGetApiVersion (hipCtx_t ctx, int *ver)
+  {
+    unsigned uver;
+    hipError_t err;
+    err = cuCtxGetApiVersion (ctx, &uver) != CUDA_SUCCESS;
+    *ver = (int) uver;
+    return err;
+  }
+
+  inline static hipError_t
+  hipStreamGetDevice (hipStream_t stream, hipDevice_t *dev)
+  {
+#if CUDA_VERSION >= 12080
+    return cudaStreamGetDevice (stream, dev);
+#else
+    hipError_t err;
+    CUcontext ctx;
+    err = cuStreamGetCtx (stream, &ctx) != CUDA_SUCCESS;
+    if (err == hipSuccess)
+      err = cuCtxPushCurrent (ctx) != CUDA_SUCCESS;
+    if (err == hipSuccess)
+      err = cuCtxGetDevice (dev) != CUDA_SUCCESS;
+    if (err == hipSuccess)
+      err = cuCtxPopCurrent (&ctx) != CUDA_SUCCESS;
+    return err;
+#endif
+  }
+
+  inline static hipError_t
+  hipStreamQuery (hipStream_t stream)
+  {
+    return cuStreamQuery (stream) != CUDA_SUCCESS;
+  }
+
+#else
+  #error "should be unreachable"
+#endif
+
+int
+main ()
+{
+  int ivar;
+  omp_interop_rc_t res;
+  omp_interop_t obj = omp_interop_none;
+  hipError_t hip_err;
+
+  #pragma omp interop init(target, targetsync, prefer_type("hip") : obj)
+
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success);
+  assert (fr == omp_ifr_hip);
+
+  ivar = (int) omp_get_interop_int (obj, omp_ipr_vendor, &res);
+  assert (res == omp_irc_success);
+  int vendor_is_amd = ivar == 1;
+  #if defined(__HIP_PLATFORM_AMD__)
+    assert (ivar == 1);
+  #elif defined(__HIP_PLATFORM_NVIDIA__)
+    assert (ivar == 11);
+  #else
+    assert (0);
+  #endif
+
+
+  /* Check whether the omp_ipr_device -> hipDevice_t yields a valid device.  */
+
+  hipDevice_t hip_dev = (int) omp_get_interop_int (obj, omp_ipr_device, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume a clock size is available and > 1 GHz; value is in kHz.  */
+  hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeClockRate, hip_dev);
+  assert (hip_err == hipSuccess);
+  assert (ivar > 1000000 /* kHz */);
+
+  /* Assume that the MaxGridDimX is available and > 1024.  */
+  hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeMaxGridDimX, hip_dev);
+  assert (hip_err == hipSuccess);
+  assert (ivar > 1024);
+
+
+  /* Check whether the omp_ipr_device_context -> hipCtx_t yields a context.  */
+
+  hipCtx_t hip_ctx = (hipCtx_t) omp_get_interop_ptr (obj, omp_ipr_device_context, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD.  */
+  ivar = -99;
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+    hip_err = hipCtxGetApiVersion (hip_ctx, &ivar);
+  #pragma GCC diagnostic pop
+
+  if (vendor_is_amd)
+    assert (hip_err == hipErrorNotSupported && ivar == -99);
+  else
+    {
+      assert (hip_err == hipSuccess);
+      assert (ivar > 0);
+    }
+
+
+  /* Check whether the omp_ipr_targetsync -> hipStream_t yields a stream.  */
+
+  hipStream_t hip_sm = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  hipDevice_t dev_stream = 99;
+  hip_err = hipStreamGetDevice (hip_sm, &dev_stream);
+  assert (hip_err == hipSuccess);
+  assert (dev_stream == hip_dev);
+
+  /* All jobs should have been completed (as there were none none)  */
+  hip_err = hipStreamQuery (hip_sm);
+  assert (hip_err == hipSuccess);
+
+  #pragma omp interop destroy(obj)
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c
new file mode 100644
index 0000000..53c05bd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target gomp_hip_header_amd } */
+/* { dg-require-effective-target gomp_libhipblas } */
+/* { dg-additional-options "-lhipblas" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c
new file mode 100644
index 0000000..0ea3133
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target gomp_libhipblas } */
+/* { dg-additional-options "-lhipblas" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c
new file mode 100644
index 0000000..ed428c6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-require-effective-target gomp_hip_header_nvidia } */
+/* { dg-additional-options "-lcublas -Wno-deprecated-declarations" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c
new file mode 100644
index 0000000..1a31b30
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c
@@ -0,0 +1,9 @@
+/* { dg-require-effective-target openacc_libcublas } */
+/* { dg-additional-options "-lcublas" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+#define USE_CUDA_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c
new file mode 100644
index 0000000..f85c13b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-additional-options "-lcublas" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas.h b/libgomp/testsuite/libgomp.c/interop-hipblas.h
new file mode 100644
index 0000000..d7cb174
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas.h
@@ -0,0 +1,240 @@
+/* Check whether hipBlas' daxpy works with an interop object.
+     daxpy(N, DA, DX, INCX, DY, INCY)
+   calculates (for DX = DY = 1):
+     DY(1:N) =  DY(1:N) + DA * DX(1:N)
+   and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document  */
+
+/* Minimal check whether HIP works - by checking whether the API routines
+   seem to work.  This includes a fallback if the header is not
+   available.  */
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<hipblas/hipblas.h>) && (__has_include(<library_types.h>) || !defined(__HIP_PLATFORM_NVIDIA__)) && !defined(USE_HIP_FALLBACK_HEADER)
+  #ifdef __HIP_PLATFORM_NVIDIA__
+    /* There seems to be an issue with hip/library_types.h including
+       CUDA's "library_types.h". Include CUDA's one explicitly here.
+       Could possibly worked around by using -isystem vs. -I.  */
+    #include <library_types.h>
+
+    /* For some reasons, the following symbols do not seem to get
+       mapped from HIP to CUDA, causing link errors.  */
+    #define hipblasSetStream cublasSetStream_v2
+    #define hipblasDaxpy cublasDaxpy_v2
+    #define hipblasCreate cublasCreate_v2
+  #endif
+  #include <hipblas/hipblas.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_AMD__"
+  #endif
+
+  typedef enum
+  {
+    HIPBLAS_STATUS_SUCCESS = 0
+
+  } hipblasStatus_t;
+
+  typedef struct ihipStream_t* hipStream_t;
+  typedef void* hipblasHandle_t;
+
+  hipblasStatus_t hipblasCreate (hipblasHandle_t*);
+  hipblasStatus_t hipblasSetStream (hipblasHandle_t, hipStream_t);
+  hipblasStatus_t hipblasDaxpy (hipblasHandle_t, int, const double*, const double*, int, double*, int);
+
+#else
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_NVIDA__"
+  #endif
+
+  #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+    #include <cuda.h>
+    #include <cudaTypedefs.h>
+    #include <cuda_runtime.h>
+    #include <cublas_v2.h>
+
+  #else
+    /* Add a poor man's fallback declaration.  */
+    #if defined(USE_CUDA_FALLBACK_HEADER)
+      // no warning
+    #elif !__has_include(<cuda.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda.h"
+    #elif !__has_include(<cudaTypedefs.h>)
+      #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+    #elif !__has_include(<cuda_runtime.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+    #else
+      #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+    #endif
+    #include "../../../include/cuda/cuda.h"
+
+    typedef enum {
+      CUBLAS_STATUS_SUCCESS = 0,
+    } cublasStatus_t;
+
+    typedef CUstream cudaStream_t;
+    typedef struct cublasContext* cublasHandle_t;
+
+    #define cublasCreate cublasCreate_v2
+    cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+    #define cublasSetStream cublasSetStream_v2
+    cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+    #define cublasDaxpy cublasDaxpy_v2
+    cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+  #endif
+
+  #define HIPBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
+  #define hipblasStatus_t cublasStatus_t
+  #define hipStream_t cudaStream_t
+  #define hipblasHandle_t cublasHandle_t
+  #define hipblasCreate cublasCreate
+  #define hipblasSetStream cublasSetStream
+  #define hipblasDaxpy cublasDaxpy
+#endif
+
+static int used_variant = 0;
+
+void
+run_hipBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+  used_variant = 1;
+
+  omp_interop_rc_t res;
+  hipblasStatus_t stat;
+
+  omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success && fr == omp_ifr_hip);
+
+  hipStream_t stream = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  hipblasHandle_t handle;
+  stat = hipblasCreate (&handle);
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+  stat = hipblasSetStream (handle, stream);
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+  /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space.  */
+  stat = hipblasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+}
+
+#if defined(__HIP_PLATFORM_AMD__)
+#pragma omp declare variant(run_hipBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("amdgcn")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("hip")))
+#elif defined(__HIP_PLATFORM_NVIDIA__) 
+#pragma omp declare variant(run_hipBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("hip")))
+#else
+ #error "wrong platform"
+#endif
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+  used_variant = 2;
+
+  if (incx == 1 && incy == 1)
+    #pragma omp simd
+    for (int i = 0; i < n; i++)
+      dy[i] += da * dx[i];
+  else
+    {
+      int ix = 0;
+      int iy = 0;
+      for (int i = 0; i < n; i++)
+	{
+	  dy[iy] += da * dx[ix];
+	  ix += incx;
+	  iy += incy;
+	}
+    }
+}
+
+
+void
+run_test (int dev)
+{
+  constexpr int N = 1024;
+
+  // A = {1,2,...,N}
+  // B = {-1, -2, ..., N}
+  // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+  //    = B + 3*A
+  // -> B' = {0, 2, 4, 6, ... }
+
+  double A[N], B[N];
+  double factor = 3.0;
+  for (int i = 0; i < N; i++)
+    {
+      A[i] = i;
+      B[i] = -i;
+    }
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target enter data device(dev) map(A, B)
+    }
+
+  used_variant = 99;
+  #pragma omp dispatch device(dev)
+    run_daxpy (N, factor, A, 1, B, 1);  
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+      int tmp = omp_get_default_device ();
+      omp_set_default_device (dev);
+#if defined(__HIP_PLATFORM_AMD__)
+      if (on_device_arch_gcn ())
+#else
+      if (on_device_arch_nvptx ())
+#endif
+	assert (used_variant == 1);
+      else
+	assert (used_variant == 2);
+      omp_set_default_device (tmp);
+    }
+  else
+    assert (used_variant == 2);
+
+  for (int i = 0; i < N; i++)
+    assert (B[i] == 2*i);
+}
+
+int   
+main () 
+{   
+  int ndev = omp_get_num_devices ();
+
+  for (int dev = 0; dev <= ndev; dev++)
+    run_test (dev);
+  run_test (omp_initial_device);  
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-hsa.c b/libgomp/testsuite/libgomp.c/interop-hsa.c
new file mode 100644
index 0000000..cf8bc90
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hsa.c
@@ -0,0 +1,203 @@
+/* { dg-additional-options "-ldl" } */
+/* { dg-require-effective-target offload_device_gcn } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include <assert.h>
+#include <dlfcn.h>
+#include "../../../include/hsa.h"
+#include "../../config/gcn/libgomp-gcn.h"
+
+#define STACKSIZE (100 * 1024)
+#define HEAPSIZE (10 * 1024 * 1024)
+#define ARENASIZE HEAPSIZE
+
+/* This code fragment must be optimized or else the host-fallback kernel has
+ * invalid ASM inserts.  The rest of the file can be compiled safely at -O0.  */
+#pragma omp declare target
+uintptr_t __attribute__((optimize("O1")))
+get_kernel_ptr ()
+{
+  uintptr_t val;
+  if (!omp_is_initial_device ())
+    /* "main._omp_fn.0" is the name GCC gives the first OpenMP target
+     * region in the "main" function.
+     * The ".kd" suffix is added by the LLVM assembler when it creates the
+     * kernel meta-data, and this is what we need to launch a kernel.  */
+    asm ("s_getpc_b64 %0\n\t"
+	 "s_add_u32 %L0, %L0, main._omp_fn.0.kd@rel32@lo+4\n\t"
+	 "s_addc_u32 %H0, %H0, main._omp_fn.0.kd@rel32@hi+4"
+	 : "=Sg"(val));
+  return val;
+}
+#pragma omp end declare target
+
+int
+main(int argc, char** argv)
+{
+
+  /* Load the HSA runtime DLL.  */
+  void *hsalib = dlopen ("libhsa-runtime64.so.1", RTLD_LAZY);
+  assert (hsalib);
+
+  hsa_status_t (*hsa_signal_create) (hsa_signal_value_t initial_value,
+				     uint32_t num_consumers,
+				     const hsa_agent_t *consumers,
+				     hsa_signal_t *signal)
+    = dlsym (hsalib, "hsa_signal_create");
+  assert (hsa_signal_create);
+
+  uint64_t (*hsa_queue_load_write_index_relaxed) (const hsa_queue_t *queue)
+    = dlsym (hsalib, "hsa_queue_load_write_index_relaxed");
+  assert (hsa_queue_load_write_index_relaxed);
+
+  void (*hsa_signal_store_relaxed) (hsa_signal_t signal,
+				    hsa_signal_value_t value)
+    = dlsym (hsalib, "hsa_signal_store_relaxed");
+  assert (hsa_signal_store_relaxed);
+
+  hsa_signal_value_t (*hsa_signal_wait_relaxed) (hsa_signal_t signal,
+						 hsa_signal_condition_t condition,
+						 hsa_signal_value_t compare_value,
+						 uint64_t timeout_hint,
+						 hsa_wait_state_t wait_state_hint)
+    = dlsym (hsalib, "hsa_signal_wait_relaxed");
+  assert (hsa_signal_wait_relaxed);
+
+  void (*hsa_queue_store_write_index_relaxed) (const hsa_queue_t *queue,
+					       uint64_t value)
+    = dlsym (hsalib, "hsa_queue_store_write_index_relaxed");
+  assert (hsa_queue_store_write_index_relaxed);
+
+  hsa_status_t (*hsa_signal_destroy) (hsa_signal_t signal)
+    = dlsym (hsalib, "hsa_signal_destroy");
+  assert (hsa_signal_destroy);
+
+  /* Set up the device data environment.  */
+  int test_data_value = 0;
+#pragma omp target enter data map(test_data_value)
+
+  /* Get the interop details.  */
+  int device_num = omp_get_default_device();
+  hsa_agent_t *gpu_agent;
+  hsa_queue_t *hsa_queue = NULL;
+
+  omp_interop_t interop = omp_interop_none;
+#pragma omp interop init(target, targetsync, prefer_type("hsa"): interop) device(device_num)
+  assert (interop != omp_interop_none);
+
+  omp_interop_rc_t retcode;
+  omp_interop_fr_t fr = omp_get_interop_int (interop, omp_ipr_fr_id, &retcode);
+  assert (retcode == omp_irc_success);
+  assert (fr == omp_ifr_hsa);
+
+  gpu_agent = omp_get_interop_ptr(interop, omp_ipr_device, &retcode);
+  assert (retcode == omp_irc_success);
+
+  hsa_queue = omp_get_interop_ptr(interop, omp_ipr_targetsync, &retcode);
+  assert (retcode == omp_irc_success);
+  assert (hsa_queue);
+
+  /* Call an offload kernel via OpenMP/libgomp.
+   *
+   * This kernel serves two purposes:
+   *   1) Lookup the device-side load-address of itself (thus avoiding the
+   *   need to access the libgomp internals).
+   *   2) Count how many times it is called.
+   * We then call it once using OpenMP, and once manually, and check
+   * the counter reads "2".  */
+  uint64_t kernel_object = 0;
+#pragma omp target map(from:kernel_object) map(present,alloc:test_data_value)
+  {
+    kernel_object = get_kernel_ptr ();
+    ++test_data_value;
+  }
+
+  assert (kernel_object != 0);
+
+  /* Configure the same kernel to run again, using HSA manually this time.  */
+  hsa_status_t status;
+  hsa_signal_t signal;
+  status = hsa_signal_create(1, 0, NULL, &signal);
+  assert (status == HSA_STATUS_SUCCESS);
+
+  /* The kernel is built by GCC for OpenMP, so we need to pass the same
+   * data pointers that libgomp would pass in.  */
+  struct {
+    uintptr_t test_data_value;
+    uintptr_t kernel_object;
+  } tgtaddrs;
+
+#pragma omp target data use_device_addr(test_data_value)
+  {
+    tgtaddrs.test_data_value = (uintptr_t)&test_data_value;
+    tgtaddrs.kernel_object = (uintptr_t)omp_target_alloc (8, device_num);
+  }
+
+  /* We also need to duplicate the launch ABI used by plugin-gcn.c.  */
+  struct kernargs_abi args;    /* From libgomp-gcn.h.  */
+  args.dummy1 = (int64_t)&tgtaddrs;
+  args.out_ptr = (int64_t)malloc (sizeof (struct output)); /* Host side.  */
+  args.heap_ptr = (int64_t)omp_target_alloc (HEAPSIZE, device_num);
+  args.arena_ptr = (int64_t)omp_target_alloc (ARENASIZE, device_num);
+  args.stack_ptr = (int64_t)omp_target_alloc (STACKSIZE, device_num);
+  args.arena_size_per_team = ARENASIZE;
+  args.stack_size_per_thread = STACKSIZE;
+
+  /* Build the HSA dispatch packet, and insert it into the queue.  */
+  uint64_t packet_id = hsa_queue_load_write_index_relaxed (hsa_queue);
+  const uint32_t queueMask = hsa_queue->size - 1;
+  hsa_kernel_dispatch_packet_t *dispatch_packet =
+    &(((hsa_kernel_dispatch_packet_t *)
+	  (hsa_queue->base_address))[packet_id & queueMask]);
+
+  dispatch_packet->setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+  dispatch_packet->workgroup_size_x = 1;
+  dispatch_packet->workgroup_size_y = 64;
+  dispatch_packet->workgroup_size_z = 1;
+  dispatch_packet->grid_size_x = 1;
+  dispatch_packet->grid_size_y = 64;
+  dispatch_packet->grid_size_z = 1;
+  dispatch_packet->completion_signal = signal;
+  dispatch_packet->kernel_object = kernel_object;
+  dispatch_packet->kernarg_address = &args;
+  dispatch_packet->private_segment_size = 0;
+  dispatch_packet->group_segment_size = 1536;
+
+  uint16_t header = 0;
+  header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+  /* Finish writing the packet header with an atomic release.  */
+  __atomic_store_n((uint16_t*)dispatch_packet, header, __ATOMIC_RELEASE);
+
+  hsa_queue_store_write_index_relaxed (hsa_queue, packet_id + 1);
+  
+  ;/* Run the kernel and wait for it to complete.  */
+  hsa_signal_store_relaxed(hsa_queue->doorbell_signal, packet_id);
+  while (hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_LT, 1,
+	UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0)
+    ;
+
+  /* Clean up HSA.  */
+  hsa_signal_destroy(signal);
+  free ((void*)args.out_ptr);
+  omp_target_free ((void*)args.heap_ptr, device_num);
+  omp_target_free ((void*)args.arena_ptr, device_num);
+  omp_target_free ((void*)args.stack_ptr, device_num);
+  omp_target_free ((void*)tgtaddrs.kernel_object, device_num);
+
+  /* Clean up OpenMP.  */
+  #pragma omp interop destroy(interop)
+
+  /* Bring the data back from the device.  */
+#pragma omp target exit data map(test_data_value)
+
+  /* Ensure the kernel was called twice.  Once by OpenMP, once by HSA.  */
+  assert (test_data_value == 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90
new file mode 100644
index 0000000..bbd49dd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90
@@ -0,0 +1,7 @@
+! { dg-require-effective-target gomp_hipfort_module }
+! { dg-require-effective-target gomp_libamdhip64 }
+! { dg-additional-options "-lamdhip64" }
+
+#define HAVE_HIPFORT 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90
new file mode 100644
index 0000000..0afec83
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90
@@ -0,0 +1,6 @@
+! { dg-require-effective-target gomp_libamdhip64 }
+! { dg-additional-options "-lamdhip64" }
+
+#define USE_HIP_FALLBACK_MODULE 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90
new file mode 100644
index 0000000..cef592f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90
@@ -0,0 +1,9 @@
+! { dg-require-effective-target gomp_hipfort_module }
+! { dg-require-effective-target openacc_cudart }
+! { dg-require-effective-target openacc_cuda }
+! { dg-additional-options "-lcuda -lcudart" }
+
+#define HAVE_HIPFORT 1
+#define USE_CUDA_NAMES 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90
new file mode 100644
index 0000000..c1ef29d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90
@@ -0,0 +1,8 @@
+! { dg-require-effective-target openacc_libcudart }
+! { dg-require-effective-target openacc_libcuda }
+! { dg-additional-options "-lcuda -lcudart" }
+
+#define USE_CUDA_NAMES 1
+#define USE_HIP_FALLBACK_MODULE 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip.h b/libgomp/testsuite/libgomp.fortran/interop-hip.h
new file mode 100644
index 0000000..753ccce
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip.h
@@ -0,0 +1,214 @@
+! Minimal check whether HIP works - by checking whether the API routines
+! seem to work.  This includes a  fallback if hipfort is not available
+
+#ifndef HAVE_HIPFORT
+#ifndef USE_HIP_FALLBACK_MODULE
+#if USE_CUDA_NAMES
+#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined (for NVIDA/CUDA)"
+#else
+#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined - assume AMD as USE_CUDA_NAMES is unset"
+#endif
+#endif
+module hipfort  ! Minimal implementation for the testsuite
+  implicit none
+
+  enum, bind(c)
+    enumerator :: hipSuccess = 0
+    enumerator :: hipErrorNotSupported = 801
+  end enum
+
+  enum, bind(c)
+    enumerator :: hipDeviceAttributeClockRate = 5
+    enumerator :: hipDeviceAttributeMaxGridDimX = 29
+  end enum
+
+  interface
+    integer(kind(hipSuccess)) function hipDeviceGetAttribute (ip, attr, dev) &
+#if USE_CUDA_NAMES
+        bind(c, name="cudaDeviceGetAttribute")
+#else
+        bind(c, name="hipDeviceGetAttribute")
+#endif
+      use iso_c_binding, only: c_ptr, c_int
+      import
+      implicit none
+      type(c_ptr), value :: ip
+      integer(kind(hipDeviceAttributeClockRate)), value :: attr
+      integer(c_int), value :: dev
+    end
+
+    integer(kind(hipSuccess)) function hipCtxGetApiVersion (ctx, ip) &
+#if USE_CUDA_NAMES
+        bind(c, name="cudaCtxGetApiVersion")
+#else
+        bind(c, name="hipCtxGetApiVersion")
+#endif
+      use iso_c_binding, only: c_ptr
+      import
+      implicit none
+      type(c_ptr), value :: ctx, ip
+    end
+
+    integer(kind(hipSuccess)) function hipStreamQuery (stream) &
+#if USE_CUDA_NAMES
+        bind(c, name="cudaStreamQuery")
+#else
+        bind(c, name="hipStreamQuery")
+#endif
+      use iso_c_binding, only: c_ptr
+      import
+      implicit none
+      type(c_ptr), value :: stream
+    end
+
+   integer(kind(hipSuccess)) function hipStreamGetFlags (stream, flags) &
+#if USE_CUDA_NAMES
+        bind(c, name="cudaStreamGetFlags")
+#else
+        bind(c, name="hipStreamGetFlags")
+#endif
+      use iso_c_binding, only: c_ptr
+      import
+      implicit none
+      type(c_ptr), value :: stream
+      type(c_ptr), value :: flags
+    end
+  end interface
+end module
+#endif
+
+program main
+  use iso_c_binding, only: c_ptr, c_int, c_loc
+  use omp_lib
+  use hipfort
+  implicit none (type, external)
+
+! Only supported since CUDA 12.8 - skip for better compatibility
+!  ! Manally implement hipStreamGetDevice as hipfort misses it
+!  ! -> https://github.com/ROCm/hipfort/issues/238
+!  interface
+!    integer(kind(hipSuccess)) function my_hipStreamGetDevice(stream, dev) &
+!#if USE_CUDA_NAMES
+!        bind(c, name="cudaStreamGetDevice")
+!#else
+!        bind(c, name="hipStreamGetDevice")
+!#endif
+!      use iso_c_binding, only: c_ptr, c_int
+!      import
+!      implicit none
+!      type(c_ptr), value :: stream
+!      integer(c_int) :: dev
+!    end
+!  end interface
+
+  integer(c_int), target :: ivar
+  integer(omp_interop_rc_kind) :: res
+  integer(omp_interop_kind) :: obj
+  integer(omp_interop_fr_kind) :: fr
+  integer(kind(hipSuccess)) :: hip_err
+  integer(c_int) :: hip_dev, dev_stream
+  type(c_ptr) :: hip_ctx, hip_sm
+
+  logical :: vendor_is_amd
+
+  obj = omp_interop_none
+
+  !$omp interop init(target, targetsync, prefer_type("hip") : obj)
+
+  fr = omp_get_interop_int (obj, omp_ipr_fr_id, res)
+  if (res /= omp_irc_success) error stop 1
+  if (fr /= omp_ifr_hip) error stop 1
+
+  ivar = omp_get_interop_int (obj, omp_ipr_vendor, res)
+  if (ivar == 1) then  ! AMD
+    vendor_is_amd = .true.
+  else if (ivar == 11) then  ! Nvidia
+    vendor_is_amd = .false.
+  else
+    error stop 1  ! Unknown
+  endif
+#if USE_CUDA_NAMES
+  if (vendor_is_amd) error stop 1
+#else
+  if (.not. vendor_is_amd) error stop 1
+#endif
+
+  ! Check whether the omp_ipr_device -> hipDevice_t yields a valid device.
+
+  hip_dev = omp_get_interop_int (obj, omp_ipr_device, res)
+  if (res /= omp_irc_success) error stop 1
+
+! AMD messed up in Fortran with the attribute handling, missing the
+! translation table it has for C.
+block
+  enum, bind(c)
+    enumerator :: cudaDevAttrClockRate = 13
+    enumerator :: cudaDevAttrMaxGridDimX = 5
+  end enum
+
+  ! Assume a clock size is available and > 1 GHz; value is in kHz.
+  ! c_loc is completely bogus, but as AMD messed up the interface ...
+  ! Cf. https://github.com/ROCm/hipfort/issues/239
+if (vendor_is_amd) then
+  hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeClockRate, hip_dev)
+else
+  hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrClockRate, hip_dev)
+endif
+  if (hip_err /= hipSuccess) error stop 1
+  if (ivar <= 1000000) error stop 1  ! in kHz
+
+  ! Assume that the MaxGridDimX is available and > 1024
+  ! c_loc is completely bogus, but as AMD messed up the interface ...
+  ! Cf. https://github.com/ROCm/hipfort/issues/239
+if (vendor_is_amd) then
+  hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeMaxGridDimX, hip_dev)
+else
+  hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrMaxGridDimX, hip_dev)
+endif
+  if (hip_err /= hipSuccess) error stop 1
+  if (ivar <= 1024) error stop 1
+end block
+
+
+  ! Check whether the omp_ipr_device_context -> hipCtx_t yields a context.
+
+  hip_ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, res)
+  if (res /= omp_irc_success) error stop 1
+
+!  ! Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD.  */
+!  ivar = -99
+!  ! AMD deprectated hipCtxGetApiVersion (in C/C++)
+!  hip_err = hipCtxGetApiVersion (hip_ctx, c_loc(ivar))
+!
+!  if (vendor_is_amd) then
+!    if (hip_err /= hipErrorNotSupported .or. ivar /= -99) error stop 1
+!  else
+!    if (hip_err /= hipSuccess) error stop 1
+!    if (ivar <= 0) error stop 1
+!  end if
+
+
+  ! Check whether the omp_ipr_targetsync -> hipStream_t yields a stream.
+
+  hip_sm = omp_get_interop_ptr (obj, omp_ipr_targetsync, res)
+  if (res /= omp_irc_success) error stop 1
+
+! Skip as this is only in CUDA 12.8
+!  dev_stream = 99
+!    ! Not (yet) implemented: https://github.com/ROCm/hipfort/issues/238
+!    !  hip_err = hipStreamGetDevice (hip_sm, dev_stream)
+!  hip_err = my_hipStreamGetDevice (hip_sm, dev_stream)
+!  if (hip_err /= hipSuccess) error stop 1
+!  if (dev_stream /= hip_dev) error stop 1
+
+  ! Get flags of the stream
+  hip_err = hipStreamGetFlags (hip_sm, c_loc (ivar))
+  if (hip_err /= hipSuccess) error stop 1
+  ! Accept any value
+
+  ! All jobs should have been completed (as there were none none)
+  hip_err = hipStreamQuery (hip_sm)
+  if (hip_err /= hipSuccess) error stop 1
+
+  !$omp interop destroy(obj)
+end
diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90
new file mode 100644
index 0000000..c6d671c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90
@@ -0,0 +1,532 @@
+! { dg-additional-options "-cpp" }
+
+! FIXME: Some tests do not work yet. Those are for now in '#if 0'
+
+! Check that 'map(alloc:' properly works with
+! - deferred-length character strings
+! - arrays with array descriptors
+! For those, the array descriptor / string length must be mapped with 'to:'
+
+program main
+implicit none
+
+type t
+  integer :: ic(2:5), ic2
+  character(len=11) :: ccstr(3:4), ccstr2
+  character(len=11,kind=4) :: cc4str(3:7), cc4str2
+  integer, pointer :: pc(:), pc2
+  character(len=:), pointer :: pcstr(:), pcstr2
+  character(len=:,kind=4), pointer :: pc4str(:), pc4str2
+end type t
+
+type(t) :: dt
+
+integer :: ii(5), ii2
+character(len=11) :: clstr(-1:1), clstr2
+character(len=11,kind=4) :: cl4str(0:3), cl4str2
+integer, pointer :: ip(:), ip2
+integer, allocatable :: ia(:), ia2
+character(len=:), pointer :: pstr(:), pstr2
+character(len=:), allocatable :: astr(:), astr2
+character(len=:,kind=4), pointer :: p4str(:), p4str2
+character(len=:,kind=4), allocatable :: a4str(:), a4str2
+
+
+allocate(dt%pc(5), dt%pc2)
+allocate(character(len=2) :: dt%pcstr(2))
+allocate(character(len=4) :: dt%pcstr2)
+
+allocate(character(len=3,kind=4) :: dt%pc4str(2:3))
+allocate(character(len=5,kind=4) :: dt%pc4str2)
+
+allocate(ip(5), ip2, ia(8), ia2)
+allocate(character(len=2) :: pstr(-2:0))
+allocate(character(len=4) :: pstr2)
+allocate(character(len=6) :: astr(3:5))
+allocate(character(len=8) :: astr2)
+
+allocate(character(len=3,kind=4) :: p4str(2:4))
+allocate(character(len=5,kind=4) :: p4str2)
+allocate(character(len=7,kind=4) :: a4str(-2:3))
+allocate(character(len=9,kind=4) :: a4str2)
+
+
+! integer :: ic(2:5), ic2
+
+!$omp target enter data map(alloc: dt%ic)
+!$omp target map(alloc: dt%ic)
+  if (size(dt%ic) /= 4) error stop
+  if (lbound(dt%ic, 1) /= 2) error stop
+  if (ubound(dt%ic, 1) /= 5) error stop
+  dt%ic = [22, 33, 44, 55]
+!$omp end target
+!$omp target exit data map(from: dt%ic)
+if (size(dt%ic) /= 4) error stop
+if (lbound(dt%ic, 1) /= 2) error stop
+if (ubound(dt%ic, 1) /= 5) error stop
+if (any (dt%ic /= [22, 33, 44, 55])) error stop
+
+!$omp target enter data map(alloc: dt%ic2)
+!$omp target map(alloc: dt%ic2)
+  dt%ic2 = 42
+!$omp end target
+!$omp target exit data map(from: dt%ic2)
+if (dt%ic2 /= 42) error stop
+
+
+! character(len=11) :: ccstr(3:4), ccstr2
+
+!$omp target enter data map(alloc: dt%ccstr)
+!$omp target map(alloc: dt%ccstr)
+  if (len(dt%ccstr) /= 11) error stop
+  if (size(dt%ccstr) /= 2) error stop
+  if (lbound(dt%ccstr, 1) /= 3) error stop
+  if (ubound(dt%ccstr, 1) /= 4) error stop
+  dt%ccstr = ["12345678901", "abcdefghijk"]
+!$omp end target
+!$omp target exit data map(from: dt%ccstr)
+if (len(dt%ccstr) /= 11) error stop
+if (size(dt%ccstr) /= 2) error stop
+if (lbound(dt%ccstr, 1) /= 3) error stop
+if (ubound(dt%ccstr, 1) /= 4) error stop
+if (any (dt%ccstr /= ["12345678901", "abcdefghijk"])) error stop
+
+!$omp target enter data map(alloc: dt%ccstr2)
+!$omp target map(alloc: dt%ccstr2)
+  if (len(dt%ccstr2) /= 11) error stop
+  dt%ccstr2 = "ABCDEFGHIJK"
+!$omp end target
+!$omp target exit data map(from: dt%ccstr2)
+if (len(dt%ccstr2) /= 11) error stop
+if (dt%ccstr2 /= "ABCDEFGHIJK") error stop
+
+
+! character(len=11,kind=4) :: cc4str(3:7), cc4str2
+
+#if 0
+! Value check fails
+!$omp target map(alloc: dt%cc4str)
+  if (len(dt%cc4str) /= 11) error stop
+  if (size(dt%cc4str) /= 5) error stop
+  if (lbound(dt%cc4str, 1) /= 3) error stop
+  if (ubound(dt%cc4str, 1) /= 7) error stop
+  dt%cc4str = [4_"12345678901", 4_"abcdefghijk", &
+               4_"qerftcea6ds", 4_"a1f9g37ga4.", &
+               4_"45ngwj56sj2"]
+!$omp end target
+!$omp target exit data map(from: dt%cc4str)
+if (len(dt%cc4str) /= 11) error stop
+if (size(dt%cc4str) /= 5) error stop
+if (lbound(dt%cc4str, 1) /= 3) error stop
+if (ubound(dt%cc4str, 1) /= 7) error stop
+if (dt%cc4str(3) /= 4_"12345678901") error stop
+if (dt%cc4str(4) /= 4_"abcdefghijk") error stop
+if (dt%cc4str(5) /= 4_"qerftcea6ds") error stop
+if (dt%cc4str(6) /= 4_"a1f9g37ga4.") error stop
+if (dt%cc4str(7) /= 4_"45ngwj56sj2") error stop
+#endif
+
+!$omp target enter data map(alloc: dt%cc4str2)
+!$omp target map(alloc: dt%cc4str2)
+  if (len(dt%cc4str2) /= 11) error stop
+  dt%cc4str2 = 4_"ABCDEFGHIJK"
+!$omp end target
+!$omp target exit data map(from: dt%cc4str2)
+if (len(dt%cc4str2) /= 11) error stop
+if (dt%cc4str2 /= 4_"ABCDEFGHIJK") error stop
+
+
+! integer, pointer :: pc(:), pc2
+! allocate(dt%pc(5), dt%pc2)
+
+!$omp target enter data map(alloc: dt%pc)
+!$omp target map(alloc: dt%pc)
+  if (.not. associated(dt%pc)) error stop
+  if (size(dt%pc) /= 5) error stop
+  if (lbound(dt%pc, 1) /= 1) error stop
+  if (ubound(dt%pc, 1) /= 5) error stop
+  dt%pc = [11, 22, 33, 44, 55]
+!$omp end target
+!$omp target exit data map(from: dt%pc)
+if (.not. associated(dt%pc)) error stop
+if (size(dt%pc) /= 5) error stop
+if (lbound(dt%pc, 1) /= 1) error stop
+if (ubound(dt%pc, 1) /= 5) error stop
+if (any (dt%pc /= [11, 22, 33, 44, 55])) error stop
+
+!$omp target enter data map(alloc: dt%pc2)
+!$omp target map(alloc: dt%pc2)
+  if (.not. associated(dt%pc2)) error stop
+  dt%pc2 = 99
+!$omp end target
+!$omp target exit data map(from: dt%pc2)
+if (dt%pc2 /= 99) error stop
+if (.not. associated(dt%pc2)) error stop
+
+
+! character(len=:), pointer :: pcstr(:), pcstr2
+! allocate(character(len=2) :: dt%pcstr(2))
+! allocate(character(len=4) :: dt%pcstr2)
+
+!$omp target enter data map(alloc: dt%pcstr)
+!$omp target map(alloc: dt%pcstr)
+  if (.not. associated(dt%pcstr)) error stop
+  if (len(dt%pcstr) /= 2) error stop
+  if (size(dt%pcstr) /= 2) error stop
+  if (lbound(dt%pcstr, 1) /= 1) error stop
+  if (ubound(dt%pcstr, 1) /= 2) error stop
+  dt%pcstr = ["01", "jk"]
+!$omp end target
+!$omp target exit data map(from: dt%pcstr)
+if (.not. associated(dt%pcstr)) error stop
+if (len(dt%pcstr) /= 2) error stop
+if (size(dt%pcstr) /= 2) error stop
+if (lbound(dt%pcstr, 1) /= 1) error stop
+if (ubound(dt%pcstr, 1) /= 2) error stop
+if (any (dt%pcstr /= ["01", "jk"])) error stop
+
+
+!$omp target enter data map(alloc: dt%pcstr2)
+!$omp target map(alloc: dt%pcstr2)
+  if (.not. associated(dt%pcstr2)) error stop
+  if (len(dt%pcstr2) /= 4) error stop
+  dt%pcstr2 = "HIJK"
+!$omp end target
+!$omp target exit data map(from: dt%pcstr2)
+if (.not. associated(dt%pcstr2)) error stop
+if (len(dt%pcstr2) /= 4) error stop
+if (dt%pcstr2 /= "HIJK") error stop
+
+
+! character(len=:,kind=4), pointer :: pc4str(:), pc4str2
+! allocate(character(len=3,kind=4) :: dt%pc4str(2:3))
+! allocate(character(len=5,kind=4) :: dt%pc4str2)
+
+!$omp target enter data map(alloc: dt%pc4str)
+!$omp target map(alloc: dt%pc4str)
+  if (.not. associated(dt%pc4str)) error stop
+  if (len(dt%pc4str) /= 3) error stop
+  if (size(dt%pc4str) /= 2) error stop
+  if (lbound(dt%pc4str, 1) /= 2) error stop
+  if (ubound(dt%pc4str, 1) /= 3) error stop
+  dt%pc4str = [4_"456", 4_"tzu"]
+!$omp end target
+!$omp target exit data map(from: dt%pc4str)
+if (.not. associated(dt%pc4str)) error stop
+if (len(dt%pc4str) /= 3) error stop
+if (size(dt%pc4str) /= 2) error stop
+if (lbound(dt%pc4str, 1) /= 2) error stop
+if (ubound(dt%pc4str, 1) /= 3) error stop
+if (dt%pc4str(2) /= 4_"456") error stop
+if (dt%pc4str(3) /= 4_"tzu") error stop
+
+!$omp target enter data map(alloc: dt%pc4str2)
+!$omp target map(alloc: dt%pc4str2)
+  if (.not. associated(dt%pc4str2)) error stop
+  if (len(dt%pc4str2) /= 5) error stop
+  dt%pc4str2 = 4_"98765"
+!$omp end target
+!$omp target exit data map(from: dt%pc4str2)
+if (.not. associated(dt%pc4str2)) error stop
+if (len(dt%pc4str2) /= 5) error stop
+if (dt%pc4str2 /= 4_"98765") error stop
+
+
+! integer :: ii(5), ii2
+
+!$omp target enter data map(alloc: ii)
+!$omp target map(alloc: ii)
+  if (size(ii) /= 5) error stop
+  if (lbound(ii, 1) /= 1) error stop
+  if (ubound(ii, 1) /= 5) error stop
+  ii = [-1, -2, -3, -4, -5]
+!$omp end target
+!$omp target exit data map(from: ii)
+if (size(ii) /= 5) error stop
+if (lbound(ii, 1) /= 1) error stop
+if (ubound(ii, 1) /= 5) error stop
+if (any (ii /= [-1, -2, -3, -4, -5])) error stop
+
+!$omp target enter data map(alloc: ii2)
+!$omp target map(alloc: ii2)
+  ii2 = -410
+!$omp end target
+!$omp target exit data map(from: ii2)
+if (ii2 /= -410) error stop
+
+
+! character(len=11) :: clstr(-1:1), clstr2
+
+!$omp target enter data map(alloc: clstr)
+!$omp target map(alloc: clstr)
+  if (len(clstr) /= 11) error stop
+  if (size(clstr) /= 3) error stop
+  if (lbound(clstr, 1) /= -1) error stop
+  if (ubound(clstr, 1) /= 1) error stop
+  clstr = ["12345678901", "abcdefghijk", "ABCDEFGHIJK"]
+!$omp end target
+!$omp target exit data map(from: clstr)
+if (len(clstr) /= 11) error stop
+if (size(clstr) /= 3) error stop
+if (lbound(clstr, 1) /= -1) error stop
+if (ubound(clstr, 1) /= 1) error stop
+if (any (clstr /= ["12345678901", "abcdefghijk", "ABCDEFGHIJK"])) error stop
+
+!$omp target enter data map(alloc: clstr2)
+!$omp target map(alloc: clstr2)
+  if (len(clstr2) /= 11) error stop
+  clstr2 = "ABCDEFghijk"
+!$omp end target
+!$omp target exit data map(from: clstr2)
+if (len(clstr2) /= 11) error stop
+if (clstr2 /= "ABCDEFghijk") error stop
+
+
+! character(len=11,kind=4) :: cl4str(0:3), cl4str2
+
+!$omp target enter data map(alloc: cl4str)
+!$omp target map(alloc: cl4str)
+  if (len(cl4str) /= 11) error stop
+  if (size(cl4str) /= 4) error stop
+  if (lbound(cl4str, 1) /= 0) error stop
+  if (ubound(cl4str, 1) /= 3) error stop
+  cl4str = [4_"12345678901", 4_"abcdefghijk", &
+            4_"qerftcea6ds", 4_"a1f9g37ga4."]
+!$omp end target
+!$omp target exit data map(from: cl4str)
+if (len(cl4str) /= 11) error stop
+if (size(cl4str) /= 4) error stop
+if (lbound(cl4str, 1) /= 0) error stop
+if (ubound(cl4str, 1) /= 3) error stop
+if (cl4str(0) /= 4_"12345678901") error stop
+if (cl4str(1) /= 4_"abcdefghijk") error stop
+if (cl4str(2) /= 4_"qerftcea6ds") error stop
+if (cl4str(3) /= 4_"a1f9g37ga4.") error stop
+
+!$omp target enter data map(alloc: cl4str2)
+!$omp target map(alloc: cl4str2)
+  if (len(cl4str2) /= 11) error stop
+  cl4str2 = 4_"ABCDEFGHIJK"
+!$omp end target
+!$omp target exit data map(from: cl4str2)
+if (len(cl4str2) /= 11) error stop
+if (cl4str2 /= 4_"ABCDEFGHIJK") error stop
+
+
+! allocate(ip(5), ip2, ia(8), ia2)
+
+!$omp target enter data map(alloc: ip)
+!$omp target map(alloc: ip)
+  if (.not. associated(ip)) error stop
+  if (size(ip) /= 5) error stop
+  if (lbound(ip, 1) /= 1) error stop
+  if (ubound(ip, 1) /= 5) error stop
+  ip = [11, 22, 33, 44, 55]
+!$omp end target
+!$omp target exit data map(from: ip)
+if (.not. associated(ip)) error stop
+if (size(ip) /= 5) error stop
+if (lbound(ip, 1) /= 1) error stop
+if (ubound(ip, 1) /= 5) error stop
+if (any (ip /= [11, 22, 33, 44, 55])) error stop
+
+!$omp target enter data map(alloc: ip2)
+!$omp target map(alloc: ip2)
+  if (.not. associated(ip2)) error stop
+  ip2 = 99
+!$omp end target
+!$omp target exit data map(from: ip2)
+if (ip2 /= 99) error stop
+if (.not. associated(ip2)) error stop
+
+
+! allocate(ip(5), ip2, ia(8), ia2)
+
+!$omp target enter data map(alloc: ia)
+!$omp target map(alloc: ia)
+  if (.not. allocated(ia)) error stop
+  if (size(ia) /= 8) error stop
+  if (lbound(ia, 1) /= 1) error stop
+  if (ubound(ia, 1) /= 8) error stop
+  ia = [1,2,3,4,5,6,7,8]
+!$omp end target
+!$omp target exit data map(from: ia)
+if (.not. allocated(ia)) error stop
+if (size(ia) /= 8) error stop
+if (lbound(ia, 1) /= 1) error stop
+if (ubound(ia, 1) /= 8) error stop
+if (any (ia /= [1,2,3,4,5,6,7,8])) error stop
+
+!$omp target enter data map(alloc: ia2)
+!$omp target map(alloc: ia2)
+  if (.not. allocated(ia2)) error stop
+  ia2 = 102
+!$omp end target
+!$omp target exit data map(from: ia2)
+if (ia2 /= 102) error stop
+if (.not. allocated(ia2)) error stop
+
+
+! character(len=:), pointer :: pstr(:), pstr2
+! allocate(character(len=2) :: pstr(-2:0))
+! allocate(character(len=4) :: pstr2)
+
+!$omp target enter data map(alloc: pstr)
+!$omp target map(alloc: pstr)
+  if (.not. associated(pstr)) error stop
+  if (len(pstr) /= 2) error stop
+  if (size(pstr) /= 3) error stop
+  if (lbound(pstr, 1) /= -2) error stop
+  if (ubound(pstr, 1) /= 0) error stop
+  pstr = ["01", "jk", "aq"]
+!$omp end target
+!$omp target exit data map(from: pstr)
+if (.not. associated(pstr)) error stop
+if (len(pstr) /= 2) error stop
+if (size(pstr) /= 3) error stop
+if (lbound(pstr, 1) /= -2) error stop
+if (ubound(pstr, 1) /= 0) error stop
+if (any (pstr /= ["01", "jk", "aq"])) error stop
+
+!$omp target enter data map(alloc: pstr2)
+!$omp target map(alloc: pstr2)
+  if (.not. associated(pstr2)) error stop
+  if (len(pstr2) /= 4) error stop
+  pstr2 = "HIJK"
+!$omp end target
+!$omp target exit data map(from: pstr2)
+if (.not. associated(pstr2)) error stop
+if (len(pstr2) /= 4) error stop
+if (pstr2 /= "HIJK") error stop
+
+
+! character(len=:), allocatable :: astr(:), astr2
+! allocate(character(len=6) :: astr(3:5))
+! allocate(character(len=8) :: astr2)
+
+
+!$omp target enter data map(alloc: astr)
+!$omp target map(alloc: astr)
+  if (.not. allocated(astr)) error stop
+  if (len(astr) /= 6) error stop
+  if (size(astr) /= 3) error stop
+  if (lbound(astr, 1) /= 3) error stop
+  if (ubound(astr, 1) /= 5) error stop
+  astr = ["01db45", "jk$D%S", "zutg47"]
+!$omp end target
+!$omp target exit data map(from: astr)
+if (.not. allocated(astr)) error stop
+if (len(astr) /= 6) error stop
+if (size(astr) /= 3) error stop
+if (lbound(astr, 1) /= 3) error stop
+if (ubound(astr, 1) /= 5) error stop
+if (any (astr /= ["01db45", "jk$D%S", "zutg47"])) error stop
+
+
+!$omp target enter data map(alloc: astr2)
+!$omp target map(alloc: astr2)
+  if (.not. allocated(astr2)) error stop
+  if (len(astr2) /= 8) error stop
+  astr2 = "HIJKhijk"
+!$omp end target
+!$omp target exit data map(from: astr2)
+if (.not. allocated(astr2)) error stop
+if (len(astr2) /= 8) error stop
+if (astr2 /= "HIJKhijk") error stop
+
+
+! character(len=:,kind=4), pointer :: p4str(:), p4str2
+! allocate(character(len=3,kind=4) :: p4str(2:4))
+! allocate(character(len=5,kind=4) :: p4str2)
+
+! FAILS with value check
+
+!$omp target enter data map(alloc: p4str)
+!$omp target map(alloc: p4str)
+  if (.not. associated(p4str)) error stop
+  if (len(p4str) /= 3) error stop
+  if (size(p4str) /= 3) error stop
+  if (lbound(p4str, 1) /= 2) error stop
+  if (ubound(p4str, 1) /= 4) error stop
+  p4str(:) = [4_"f85", 4_"8af", 4_"A%F"]
+!$omp end target
+!$omp target exit data map(from: p4str)
+if (.not. associated(p4str)) error stop
+if (len(p4str) /= 3) error stop
+if (size(p4str) /= 3) error stop
+if (lbound(p4str, 1) /= 2) error stop
+if (ubound(p4str, 1) /= 4) error stop
+if (p4str(2)  /= 4_"f85") error stop
+if (p4str(3)  /= 4_"8af") error stop
+if (p4str(4)  /= 4_"A%F") error stop
+
+!$omp target enter data map(alloc: p4str2)
+!$omp target map(alloc: p4str2)
+  if (.not. associated(p4str2)) error stop
+  if (len(p4str2) /= 5) error stop
+  p4str2 = 4_"9875a"
+!$omp end target
+!$omp target exit data map(from: p4str2)
+if (.not. associated(p4str2)) error stop
+if (len(p4str2) /= 5) error stop
+if (p4str2 /= 4_"9875a") error stop
+
+
+! character(len=:,kind=4), allocatable :: a4str(:), a4str2
+! allocate(character(len=7,kind=4) :: a4str(-2:3))
+! allocate(character(len=9,kind=4) :: a4str2)
+
+!$omp target enter data map(alloc: a4str)
+!$omp target map(alloc: a4str)
+  if (.not. allocated(a4str)) error stop
+  if (len(a4str) /= 7) error stop
+  if (size(a4str) /= 6) error stop
+  if (lbound(a4str, 1) /= -2) error stop
+  if (ubound(a4str, 1) /= 3) error stop
+  ! See PR fortran/107508 why '(:)' is required
+  a4str(:) = [4_"sf456aq", 4_"3dtzu24", 4_"_4fh7sm", 4_"=ff85s7", 4_"j=8af4d", 4_".,A%Fsz"]
+!$omp end target
+!$omp target exit data map(from: a4str)
+if (.not. allocated(a4str)) error stop
+if (len(a4str) /= 7) error stop
+if (size(a4str) /= 6) error stop
+if (lbound(a4str, 1) /= -2) error stop
+if (ubound(a4str, 1) /= 3) error stop
+if (a4str(-2) /= 4_"sf456aq") error stop
+if (a4str(-1) /= 4_"3dtzu24") error stop
+if (a4str(0)  /= 4_"_4fh7sm") error stop
+if (a4str(1)  /= 4_"=ff85s7") error stop
+if (a4str(2)  /= 4_"j=8af4d") error stop
+if (a4str(3)  /= 4_".,A%Fsz") error stop
+
+!$omp target enter data map(alloc: a4str2)
+!$omp target map(alloc: a4str2)
+  if (.not. allocated(a4str2)) error stop
+  if (len(a4str2) /= 9) error stop
+  a4str2 = 4_"98765a23d"
+!$omp end target
+!$omp target exit data map(from: a4str2)
+if (.not. allocated(a4str2)) error stop
+if (len(a4str2) /= 9) error stop
+if (a4str2 /= 4_"98765a23d") error stop
+
+
+deallocate(dt%pc, dt%pc2)
+deallocate(dt%pcstr)
+deallocate(dt%pcstr2)
+
+deallocate(dt%pc4str)
+deallocate(dt%pc4str2)
+
+deallocate(ip, ip2, ia, ia2)
+deallocate(pstr)
+deallocate(pstr2)
+deallocate(astr)
+deallocate(astr2)
+
+deallocate(p4str)
+deallocate(p4str2)
+deallocate(a4str)
+deallocate(a4str2)
+
+end
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
index f2ef751..08c5766 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
@@ -4,9 +4,6 @@
    { dg-additional-options -fexceptions } */
 /* { dg-additional-options -fdump-tree-optimized-raw }
    { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target { openacc_radeon_accel_selected && __OPTIMIZE__ } xfail *-*-* } 0 }
-   { dg-ice PR119737 { openacc_radeon_accel_selected && __OPTIMIZE__ } }
-   { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && __OPTIMIZE__ } } } */
 
 /* See also '../libgomp.c++/target-exceptions-throw-1.C'.  */
 
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
index f6dc970..a7408cd 100644
--- a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
@@ -6,9 +6,6 @@
    { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
 /* { dg-bogus {undefined symbol: typeinfo name for MyException} PR119806 { target { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 }
    { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } } } */
-/* { dg-bogus {Size expression must be absolute\.} PR119737 { target { openacc_radeon_accel_selected && __OPTIMIZE__ } xfail *-*-* } 0 }
-   { dg-ice PR119737 { openacc_radeon_accel_selected && __OPTIMIZE__ } }
-   { dg-excess-errors {'mkoffload' failures etc.} { xfail { openacc_radeon_accel_selected && __OPTIMIZE__ } } } */
 /* { dg-bogus {Initial value type mismatch} PR119806 { target { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 }
    { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } } } */