15 files changed, 1570 insertions, 9 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index a60e51a..9d9ecfb 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,45 @@
+2025-04-10  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* testsuite/libgomp.c-target/aarch64/firstprivate.c: Add +sve pragma.
+	* testsuite/libgomp.c-target/aarch64/lastprivate.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/private.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/shared.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/simd-aligned.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/simd-nontemporal.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/threadprivate.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/udr-sve.c: Add an -march option.
+	(for_reduction): Use "+=" in the reduction loop.
+
+2025-04-08  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR middle-end/119662
+	* testsuite/libgomp.c/append-args-fr-1.c: New test.
+	* testsuite/libgomp.c/append-args-fr.h: New test.
+
+2025-04-08  Tobias Burnus  <tburnus@baylibre.com>
+
+	* Makefile.am (%.mod): Add -Wno-c-binding-type.
+	* Makefile.in: Regenerate.
+
+2025-04-08  Tejas Belagod  <tejas.belagod@arm.com>
+
+	* testsuite/libgomp.c-target/aarch64/aarch64.exp: Test driver.
+	* testsuite/libgomp.c-target/aarch64/firstprivate.c: New test.
+	* testsuite/libgomp.c-target/aarch64/lastprivate.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/private.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/shared.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/simd-aligned.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/simd-nontemporal.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/threadprivate.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/udr-sve.c: Likewise.
+
+2025-04-07  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.texi (omp_target_memcpy_rect_async,
+	omp_target_memcpy_rect): Add @ref to 'Offload-Target Specifics'.
+	(AMD Radeon (GCN)): Document how memcpy_rect is implemented.
+	(nvptx): Move item about memcpy_rect item down; use present tense.
+
 2025-03-26  Thomas Schwinge  <thomas@codesourcery.com>
 
 	PR driver/101544
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 855f0af..e3202ae 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -97,7 +97,7 @@ openacc_kinds.mod: openacc.mod
 openacc.mod: openacc.lo
 	:
 %.mod: %.f90
-	$(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $<
+	$(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only -Wno-c-binding-type $<
 fortran.lo: libgomp_f.h
 fortran.o: libgomp_f.h
 env.lo: libgomp_f.h
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 25cb6fc..2a0a842 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -1388,7 +1388,7 @@ openacc_kinds.mod: openacc.mod
 openacc.mod: openacc.lo
 	:
 %.mod: %.f90
-	$(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $<
+	$(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only -Wno-c-binding-type $<
 fortran.lo: libgomp_f.h
 fortran.o: libgomp_f.h
 env.lo: libgomp_f.h
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 4217c29..fed9d5e 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -2316,7 +2316,7 @@ the initial device.
 @end multitable
 
 @item @emph{See also}:
-@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy}
+@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy}, @ref{Offload-Target Specifics}
 
 @item @emph{Reference}:
 @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.6
@@ -2391,7 +2391,7 @@ the initial device.
 @end multitable
 
 @item @emph{See also}:
-@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async}
+@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async}, @ref{Offload-Target Specifics}
 
 @item @emph{Reference}:
 @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.8
@@ -6911,6 +6911,11 @@ The implementation remark:
       @code{omp_thread_mem_alloc}, all use low-latency memory as first
       preference, and fall back to main graphics memory when the low-latency
       pool is exhausted.
+@item The OpenMP routines @code{omp_target_memcpy_rect} and
+      @code{omp_target_memcpy_rect_async} and the @code{target update}
+      directive for non-contiguous list items use the 3D memory-copy function
+      of the HSA library.  Higher dimensions call this functions in a loop and
+      are therefore supported.
 @item The unique identifier (UID), used with OpenMP's API UID routines, is the
       value returned by the HSA runtime library for @code{HSA_AMD_AGENT_INFO_UUID}.
       For GPUs, it is currently @samp{GPU-} followed by 16 lower-case hex digits,
@@ -7048,11 +7053,6 @@ The implementation remark:
       devices (``host fallback'').
 @item The default per-warp stack size is 128 kiB; see also @code{-msoft-stack}
       in the GCC manual.
-@item The OpenMP routines @code{omp_target_memcpy_rect} and
-      @code{omp_target_memcpy_rect_async} and the @code{target update}
-      directive for non-contiguous list items will use the 2D and 3D
-      memory-copy functions of the CUDA library.  Higher dimensions will
-      call those functions in a loop and are therefore supported.
 @item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the
       the @code{access} trait is set to @code{cgroup}, and libgomp has
       been built for PTX ISA version 4.1 or higher (such as in GCC's
@@ -7070,6 +7070,11 @@ The implementation remark:
       @code{omp_thread_mem_alloc}, all use low-latency memory as first
       preference, and fall back to main graphics memory when the low-latency
       pool is exhausted.
+@item The OpenMP routines @code{omp_target_memcpy_rect} and
+      @code{omp_target_memcpy_rect_async} and the @code{target update}
+      directive for non-contiguous list items use the 2D and 3D memory-copy
+      functions of the CUDA library.  Higher dimensions call those functions
+      in a loop and are therefore supported.
 @item The unique identifier (UID), used with OpenMP's API UID routines, consists
       of the @samp{GPU-} prefix followed by the 16-bytes UUID as returned by
       the CUDA runtime library.  This UUID is output in grouped lower-case
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp b/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp
new file mode 100644
index 0000000..02d5503
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp
@@ -0,0 +1,57 @@
+# Copyright (C) 2006-2025 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Load support procs.
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+
+# Exit immediately if this isn't an AArch64 target.
+if {![istarget aarch64*-*-*] } then {
+  return
+}
+
+lappend ALWAYS_CFLAGS "compiler=$GCC_UNDER_TEST"
+
+if { [check_effective_target_aarch64_sve] } {
+    set sve_flags ""
+} else {
+    set sve_flags "-march=armv8.2-a+sve"
+}
+
+# Initialize `dg'.
+dg-init
+
+#if ![check_effective_target_fopenmp] {
+#  return
+#}
+
+# Turn on OpenMP.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenmp"
+
+# Gather a list of all tests.
+set tests [lsort [find $srcdir/$subdir *.c]]
+
+set ld_library_path $always_ld_library_path
+append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+set_ld_library_path_env_vars
+
+# Main loop.
+dg-runtest $tests "" $sve_flags
+
+# All done.
+dg-finish
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c
new file mode 100644
index 0000000..58674e2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c
@@ -0,0 +1,129 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <omp.h>
+
+static void __attribute__ ((noipa))
+vec_compare (svint32_t *x, svint32_t y)
+{
+  svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), *x, y));
+
+  if (svptest_any (svptrue_b32 (), p))
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+firstprivate_sections ()
+{
+  int b[8], c[8];
+  svint32_t vb, vc;
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 8; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+  vb = svld1_s32 (svptrue_b32 (), b);
+  vc = svld1_s32 (svptrue_b32 (), c);
+
+#pragma omp parallel sections firstprivate (vb, vc)
+  {
+    #pragma omp section
+    vec_compare (&vb, svindex_s32 (0, 1));
+    vec_compare (&vc, svindex_s32 (1, 1));
+
+    #pragma omp section
+    vec_compare (&vb, svindex_s32 (0, 1));
+    vec_compare (&vc, svindex_s32 (1, 1));
+  }
+
+}
+
+void __attribute__ ((noipa))
+firstprivate_for ()
+{
+
+  int a[32], b[32], c[32];
+  svint32_t va, vb, vc;
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 32; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+  vb = svindex_s32 (1, 0);
+  vc = svindex_s32 (0, 1);
+
+#pragma omp parallel for firstprivate (vb, vc) private (va)
+  for (i = 0; i < 4; i++)
+    {
+      svint32_t tb, tc;
+      vec_compare (&vb, svindex_s32 (1, 0));
+      vec_compare (&vc, svindex_s32 (0, 1));
+      tb = svld1_s32 (svptrue_b32 (), b + i * 8);
+      tc = svld1_s32 (svptrue_b32 (), c + i * 8);
+      va = svadd_s32_z (svptrue_b32 (), vb, vc);
+      va = svadd_s32_z (svptrue_b32 (), va, tb);
+      va = svadd_s32_z (svptrue_b32 (), va, tc);
+      svst1_s32 (svptrue_b32 (), a + i * 8, va);
+    }
+
+  for (i = 0; i < 32; i++)
+    if (a[i] != b[i] + c[i] + vb[i % 8] + vc[i % 8])
+      __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+firstprivate_distribute ()
+{
+
+  int a[32], b[32], c[32];
+  svint32_t va, vb, vc;
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 32; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+  vb = svindex_s32 (1, 0);
+  vc = svindex_s32 (0, 1);
+
+#pragma omp teams
+#pragma omp distribute firstprivate (vb, vc) private (va)
+  for (i = 0; i < 4; i++)
+    {
+      svint32_t tb, tc;
+      vec_compare (&vb, svindex_s32 (1, 0));
+      vec_compare (&vc, svindex_s32 (0, 1));
+      tb = svld1_s32 (svptrue_b32 (), b + i * 8);
+      tc = svld1_s32 (svptrue_b32 (), c + i * 8);
+      va = svadd_s32_z (svptrue_b32 (), vb, vc);
+      va = svadd_s32_z (svptrue_b32 (), va, tb);
+      va = svadd_s32_z (svptrue_b32 (), va, tc);
+      svst1_s32 (svptrue_b32 (), a + i * 8, va);
+    }
+
+  for (i = 0; i < 32; i++)
+    if (a[i] != b[i] + c[i] + vb[i % 8] + vc[i % 8])
+      __builtin_abort ();
+}
+
+int
+main ()
+{
+  firstprivate_for ();
+  firstprivate_sections ();
+  firstprivate_distribute ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c
new file mode 100644
index 0000000..2f93d7b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c
@@ -0,0 +1,171 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <omp.h>
+
+static svint32_t __attribute__ ((noipa))
+foo (svint32_t *vb, svint32_t *vc, int tn)
+{
+  svint32_t temp = svindex_s32 (tn, 0);
+  temp = svadd_s32_z (svptrue_b32 (), temp, *vb);
+  return svadd_s32_z (svptrue_b32 (), temp, *vc);
+}
+
+void __attribute__ ((noipa))
+lastprivate_sections ()
+{
+  int a[8], b[8], c[8];
+  svint32_t va, vb, vc;
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 8; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+#pragma omp parallel sections lastprivate (vb, vc) num_threads (2)
+  {
+    #pragma omp section
+    vb = svld1_s32 (svptrue_b32 (), b);
+    #pragma omp section
+    vb = svld1_s32 (svptrue_b32 (), b);
+    vc = svld1_s32 (svptrue_b32 (), c);
+  }
+
+  va = svadd_s32_z (svptrue_b32 (), vb, vc);
+  svst1_s32 (svptrue_b32 (), a, va);
+
+  for (i = 0; i < 8; i++)
+    if (a[i] != b[i] + c[i])
+      __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+lastprivate_for ()
+{
+  int a[32], b[32], c[32];
+  int aa[8], bb[8], cc[8];
+  svint32_t va, vb, vc;
+  int i, tn;
+
+#pragma omp parallel for
+  for (i = 0; i < 32; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+#pragma omp parallel for lastprivate (va, vb, vc, tn)
+  for (i = 0; i < 4; i++)
+    {
+      vb = svld1_s32 (svptrue_b32 (), b + i * 8);
+      vc = svld1_s32 (svptrue_b32 (), c + i * 8);
+      tn = i;
+      va = foo (&vb, &vc, tn);
+      svst1_s32 (svptrue_b32 (), a + i * 8, va);
+    }
+
+  svst1_s32 (svptrue_b32 (), aa, va);
+  svst1_s32 (svptrue_b32 (), bb, vb);
+  svst1_s32 (svptrue_b32 (), cc, vc);
+
+  for (i = 0; i < 8; i++)
+    if (aa[i] != bb[i] + cc[i] + tn)
+      __builtin_abort ();
+
+  for (i = 0; i < 32; i++)
+    if (a[i] != b[i] + c[i] + i / 8)
+      __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+lastprivate_simd ()
+{
+
+  int a[64], b[64], c[64];
+  int aa[8], bb[8], cc[8];
+  svint32_t va, vb, vc;
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 64; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+#pragma omp simd lastprivate (va, vb, vc)
+  for (i = 0; i < 8; i++)
+    {
+      vb = svld1_s32 (svptrue_b32 (), b + i * 8);
+      vc = svld1_s32 (svptrue_b32 (), c + i * 8);
+      va = svadd_s32_z (svptrue_b32 (), vb, vc);
+      svst1_s32 (svptrue_b32 (), a + i * 8, va);
+    }
+
+  svst1_s32 (svptrue_b32 (), aa, va);
+  svst1_s32 (svptrue_b32 (), bb, vb);
+  svst1_s32 (svptrue_b32 (), cc, vc);
+
+  for (i = 0; i < 8; i++)
+    if (aa[i] != bb[i] + cc[i])
+      __builtin_abort ();
+
+  for (i = 0; i < 64; i++)
+    if (a[i] != b[i] + c[i])
+      __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+lastprivate_distribute ()
+{
+
+  int a[32], b[32], c[32];
+  int aa[8], bb[8], cc[8];
+  svint32_t va, vb, vc;
+  int i, tn;
+
+#pragma omp parallel for
+  for (i = 0; i < 32; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+#pragma omp teams
+#pragma omp distribute lastprivate (va, vb, vc, tn)
+  for (i = 0; i < 4; i++)
+    {
+      vb = svld1_s32 (svptrue_b32 (), b + i * 8);
+      vc = svld1_s32 (svptrue_b32 (), c + i * 8);
+      tn = i;
+      va = foo (&vb, &vc, tn);
+      svst1_s32 (svptrue_b32 (), a + i * 8, va);
+    }
+
+  svst1_s32 (svptrue_b32 (), aa, va);
+  svst1_s32 (svptrue_b32 (), bb, vb);
+  svst1_s32 (svptrue_b32 (), cc, vc);
+
+  for (i = 0; i < 8; i++)
+    if (aa[i] != bb[i] + cc[i] + tn)
+      __builtin_abort ();
+
+  for (i = 0; i < 32; i++)
+    if (a[i] != b[i] + c[i] + i / 8)
+      __builtin_abort ();
+}
+
+int
+main ()
+{
+  lastprivate_for ();
+  lastprivate_sections ();
+  lastprivate_simd ();
+  lastprivate_distribute ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/private.c b/libgomp/testsuite/libgomp.c-target/aarch64/private.c
new file mode 100644
index 0000000..fed5370
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/private.c
@@ -0,0 +1,107 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <omp.h>
+
+static void __attribute__ ((noipa))
+compare_vec (svint32_t *x, svint32_t y)
+{
+  svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), *x, y));
+
+  if (svptest_any (svptrue_b32 (), p))
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+private ()
+{
+  svint32_t a;
+#pragma omp parallel private (a) num_threads (10)
+  {
+    a = svindex_s32 (omp_get_thread_num (), 0);
+
+#pragma omp barrier
+    compare_vec (&a, svindex_s32 (omp_get_thread_num (), 0));
+  }
+}
+
+void __attribute__ ((noipa))
+firstprivate ()
+{
+  svint32_t a = svindex_s32 (1,1);
+  svint32_t b;
+
+#pragma omp parallel private (b) firstprivate (a) num_threads (12)
+  {
+    compare_vec (&a, svindex_s32 (1, 1));
+    b = svindex_s32 (omp_get_thread_num (), 0);
+
+#pragma omp barrier
+    compare_vec (&a, svindex_s32 (1, 1));
+    compare_vec (&b, svindex_s32 (omp_get_thread_num (), 0));
+    if (omp_get_thread_num () == 5)
+      {
+	a = svindex_s32 (1, 2);
+	b = svindex_s32 (10, 0);
+      }
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 5)
+      {
+	compare_vec (&a, svindex_s32 (1, 2));
+	compare_vec (&b, svindex_s32 (10, 0));
+      }
+    else
+      {
+	compare_vec (&a, svindex_s32 (1, 1));
+	compare_vec (&b, svindex_s32 (omp_get_thread_num (), 0));
+      }
+  }
+}
+
+void __attribute__ ((noipa))
+lastprivate ()
+{
+  svint32_t a = svindex_s32 (1,1);
+  svint32_t b;
+  int i;
+
+#pragma omp parallel for private (a) lastprivate (b)
+  for (i = 0; i < 16; i++)
+    {
+      b = svindex_s32 (i, 0);
+
+      compare_vec (&b, svindex_s32 (i, 0));
+      if (i == 5)
+	{
+	  a = svindex_s32 (1, 2);
+	  b = svindex_s32 (10, 0);
+	}
+      else
+	a = svindex_s32 (1, 1);
+
+      if (i == 5)
+	{
+	  compare_vec (&a, svindex_s32 (1, 2));
+	  compare_vec (&b, svindex_s32 (10, 0));
+	}
+      else
+	{
+	  compare_vec (&a, svindex_s32 (1, 1));
+	  compare_vec (&b, svindex_s32 (i, 0));
+	}
+    }
+
+  compare_vec (&b, svindex_s32 (15, 0));
+}
+
+int
+main ()
+{
+  private ();
+  firstprivate ();
+  lastprivate ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/shared.c b/libgomp/testsuite/libgomp.c-target/aarch64/shared.c
new file mode 100644
index 0000000..340a668
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/shared.c
@@ -0,0 +1,266 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <stdlib.h>
+#include <omp.h>
+
+static void __attribute__ ((noipa))
+compare_vec (svint32_t x, svint32_t y)
+{
+  svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), x, y));
+
+  if (svptest_any (svptrue_b32 (), p))
+    __builtin_abort ();
+}
+
+static void __attribute__ ((noipa))
+compare_vecb (svbool_t x, svbool_t y)
+{
+  svbool_t p = sveor_b_z (svptrue_b32 (), x, y);
+
+  if (svptest_any (svptrue_b32 (), p))
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+implicit_shared_default (svint32_t a, svint32_t b, svbool_t p)
+{
+
+#pragma omp parallel default (shared) num_threads (10)
+  {
+    /* 'a', 'b' and 'p' are implicitly shared.  */
+    compare_vec (a, svindex_s32 (0, 1));
+    compare_vec (b, svindex_s32 (8, 1));
+    compare_vecb (p, svptrue_b32 ());
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 2)
+      a = svadd_s32_z (p, a, b);
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 0)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svindex_s32 (8, 1));
+	compare_vecb (p, svptrue_b32 ());
+	b = svadd_s32_z (p, a, b);
+      }
+
+#pragma omp barrier
+    compare_vec (a, svindex_s32 (8, 2));
+    compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+      }
+  }
+}
+
+void __attribute__ ((noipa))
+explicit_shared (svint32_t a, svint32_t b, svbool_t p)
+{
+
+#pragma omp parallel shared (a, b, p) num_threads (12)
+  {
+    /* 'a', 'b' and 'p' are explicitly shared.  */
+    compare_vec (a, svindex_s32 (0, 1));
+    compare_vec (b, svindex_s32 (8, 1));
+    compare_vecb (p, svptrue_b32 ());
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 2)
+      a = svadd_s32_z (p, a, b);
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 0)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svindex_s32 (8, 1));
+	compare_vecb (p, svptrue_b32 ());
+	b = svadd_s32_z (p, a, b);
+      }
+
+#pragma omp barrier
+    compare_vec (a, svindex_s32 (8, 2));
+    compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+      }
+  }
+}
+
+void __attribute__ ((noipa))
+implicit_shared_no_default (svint32_t a, svint32_t b, svbool_t p)
+{
+
+#pragma omp parallel num_threads (16)
+  {
+    /* 'a', 'b' and 'p' are implicitly shared without default clause.  */
+    compare_vec (a, svindex_s32 (0, 1));
+    compare_vec (b, svindex_s32 (8, 1));
+    compare_vecb (p, svptrue_b32 ());
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 12)
+      a = svadd_s32_z (p, a, b);
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 15)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svindex_s32 (8, 1));
+	compare_vecb (p, svptrue_b32 ());
+	b = svadd_s32_z (p, a, b);
+      }
+
+#pragma omp barrier
+    compare_vec (a, svindex_s32 (8, 2));
+    compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+      }
+  }
+
+}
+
+void __attribute__ ((noipa))
+mix_shared (svint32_t b, svbool_t p)
+{
+
+  svint32_t a = svindex_s32 (0, 0);
+  int *m = (int *) malloc (8 * sizeof (int));
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 8; i++)
+    m[i] = i;
+
+#pragma omp parallel num_threads (16)
+  {
+    compare_vec (a, svindex_s32 (0, 0));
+    compare_vec (b, svindex_s32 (8, 1));
+
+#pragma omp barrier
+    /* 'm' is predetermined shared here.  'a' is implicitly shared here.  */
+    if (omp_get_thread_num () == 10)
+      a = svld1_s32 (svptrue_b32 (), m);
+
+#pragma omp barrier
+    /* 'a', 'b' and 'p' are implicitly shared without default clause.  */
+    compare_vec (a, svindex_s32 (0, 1));
+    compare_vec (b, svindex_s32 (8, 1));
+    compare_vecb (p, svptrue_b32 ());
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 12)
+      a = svadd_s32_z (p, a, b);
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 15)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svindex_s32 (8, 1));
+	compare_vecb (p, svptrue_b32 ());
+	b = svadd_s32_z (p, a, b);
+      }
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+      }
+
+#pragma omp barrier
+    compare_vec (a, svindex_s32 (8, 2));
+    compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+  }
+}
+
+#define N __ARM_FEATURE_SVE_BITS
+#define FIXED_ATTR __attribute__((arm_sve_vector_bits (N)))
+
+typedef svint32_t v8si FIXED_ATTR;
+
+void __attribute__ ((noipa))
+predetermined_shared_static (int n)
+{
+
+  int *m = (int *) malloc (8 * sizeof (int));
+  int i;
+
+#pragma omp parallel for
+  /* 'm' is predetermined shared here.  */
+  for (i = 0; i < 8; i++)
+    m[i] = i;
+
+  static v8si a = { 0, 1, 2, 3, 4, 5, 6, 7 };
+
+#pragma omp parallel num_threads (16)
+  {
+    /* 'a' is implicit shared here.  */
+    if (n == 0)
+      compare_vec (a, svindex_s32 (0, 1));
+
+    if (n == 1)
+      compare_vec (a, svindex_s32 (1, 1));
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 12)
+      {
+	if (n == 0)
+	  compare_vec (a, svindex_s32 (0, 1));
+
+	if (n == 1)
+	  compare_vec (a, svindex_s32 (1, 1));
+
+	a = svadd_s32_z (svptrue_b32 (), a, svindex_s32 (1, 0));
+      }
+
+#pragma omp barrier
+    if (n == 0)
+      compare_vec (a, svindex_s32 (1, 1));
+
+    if (n == 1)
+      compare_vec (a, svindex_s32 (2, 1));
+  }
+}
+
+
+int
+main ()
+{
+  svint32_t x = svindex_s32 (0, 1);
+  svint32_t y = svindex_s32 (8, 1);
+  svbool_t p = svptrue_b32 ();
+
+  /* Implicit shared.  */
+  implicit_shared_default (x, y, p);
+
+  /* Explicit shared.  */
+  explicit_shared (x, y, p);
+
+  /* Implicit shared with no default clause.  */
+  implicit_shared_no_default (x, y, p);
+
+  /* Mix shared.  */
+  mix_shared (y, p);
+
+  /* Predetermined and static shared.  */
+  predetermined_shared_static (0);
+  predetermined_shared_static (1);
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c b/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c
new file mode 100644
index 0000000..14642c9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <stdint.h>
+
+#define N 256
+
+int a[N] __attribute__ ((aligned (64)));
+int b[N] __attribute__ ((aligned (64)));
+
+void __attribute__ ((noipa))
+foo (int *p, int *q, svint32_t *onesp)
+{
+   svint32_t va, vc;
+   int i;
+   uint64_t sz = svcntw ();
+
+#pragma omp simd aligned(p, q : 64) aligned (onesp : 128) \
+		 private (va, vc) nontemporal (va, vc)
+  for (i = 0; i < N; i++)
+    {
+      if (i % sz == 0)
+	{
+	  va = svld1_s32 (svptrue_b32 (), p);
+	  vc = svadd_s32_z (svptrue_b32 (), va, *onesp);
+	  svst1_s32 (svptrue_b32 (), q, vc);
+	  q += sz;
+	}
+    }
+}
+
+int
+main ()
+{
+  svint32_t ones __attribute__ ((aligned(128))) = svindex_s32 (1, 0);
+
+  for (int i = 0; i < N; i++)
+    {
+      a[i] = 1;
+      b[i] = 0;
+    }
+
+  foo (a, b, &ones);
+
+  for (int i = 0; i < N; i++)
+    if (b[i] != 2)
+      __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c b/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c
new file mode 100644
index 0000000..6fe4616
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <stdint.h>
+
+#define N 256
+
+int a[N] __attribute__ ((aligned (64)));
+int b[N] __attribute__ ((aligned (64)));
+
+void __attribute__ ((noipa))
+foo (int *p, int *q)
+{
+   svint32_t va, vb, vc;
+   int i;
+   uint64_t sz = svcntw ();
+
+#pragma omp simd aligned(p, q : 64) private (va, vb, vc) \
+		 nontemporal (va, vb, vc)
+  for (i = 0; i < N; i++)
+    {
+      if (i % sz == 0)
+	{
+	  va = svld1_s32 (svptrue_b32 (), p);
+	  vb = svindex_s32 (1, 0);
+	  vc = svadd_s32_z (svptrue_b32 (), va, vb);
+	  svst1_s32 (svptrue_b32 (), q, vc);
+	  q += sz;
+	}
+    }
+}
+
+int
+main ()
+{
+
+  for (int i = 0; i < N; i++)
+    {
+      a[i] = 1;
+      b[i] = 0;
+    }
+
+  foo (a, b);
+
+  for (int i = 0; i < N; i++)
+    if (b[i] != 2)
+      __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c
new file mode 100644
index 0000000..aa7d2f9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <stdint.h>
+
+typedef __SVInt32_t v8si __attribute__ ((arm_sve_vector_bits(256)));
+
+v8si vec1;
+#pragma omp threadprivate (vec1)
+
+void __attribute__ ((noipa))
+foo ()
+{
+  int64_t res = 0;
+
+  vec1 = svindex_s32 (1, 0);
+
+#pragma omp parallel copyin (vec1) firstprivate (res) num_threads(10)
+  {
+    res = svaddv_s32 (svptrue_b32 (), vec1);
+
+#pragma omp barrier
+    if (res != 8LL)
+      __builtin_abort ();
+  }
+}
+
+int
+main ()
+{
+  int64_t res = 0;
+
+#pragma omp parallel firstprivate (res) num_threads(10)
+  {
+    vec1 = svindex_s32 (1, 0);
+    res = svaddv_s32 (svptrue_b32 (), vec1);
+
+#pragma omp barrier
+    if (res != 8LL)
+      __builtin_abort ();
+  }
+
+  foo ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
new file mode 100644
index 0000000..03d93cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
@@ -0,0 +1,98 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-march=armv8-a+sve -msve-vector-bits=256 -fopenmp -O2" } */
+
+#include <arm_sve.h>
+
+#pragma omp declare reduction (+:svint32_t: omp_out = svadd_s32_z (svptrue_b32(), omp_in, omp_out)) \
+		    initializer (omp_priv = svindex_s32 (0, 0))
+
+void __attribute__ ((noipa))
+parallel_reduction ()
+{
+  int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
+  int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+  svint32_t va = svld1_s32 (svptrue_b32 (), b);
+  int i = 0;
+  int64_t res;
+
+  #pragma omp parallel reduction (+:va, i)
+    {
+      va = svld1_s32 (svptrue_b32 (), a);
+      i++;
+    }
+
+  res = svaddv_s32 (svptrue_b32 (), va);
+
+  if (res != i * 8)
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+for_reduction ()
+{
+  int a[8] = {1 ,1, 1, 1, 1, 1, 1, 1};
+  int b[8] = {0 ,0, 0, 0, 0, 0, 0, 0};
+  svint32_t va = svld1_s32 (svptrue_b32 (), b);
+  int j;
+  int64_t res;
+
+  #pragma omp parallel for reduction (+:va)
+  for (j = 0; j < 8; j++)
+    va += svld1_s32 (svptrue_b32 (), a);
+
+  res = svaddv_s32 (svptrue_b32 (), va);
+
+  if (res != 64)
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+simd_reduction ()
+{
+  int a[8];
+  svint32_t va = svindex_s32 (0, 0);
+  int i = 0;
+  int j;
+  int64_t res = 0;
+
+  for (j = 0; j < 8; j++)
+    a[j] = 1;
+
+  #pragma omp simd reduction (+:va, i)
+  for (j = 0; j < 16; j++)
+    va = svld1_s32 (svptrue_b32 (), a);
+
+  res = svaddv_s32 (svptrue_b32 (), va);
+
+  if (res != 8)
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+inscan_reduction_incl ()
+{
+  svint32_t va = svindex_s32 (0, 0);
+  int j;
+  int64_t res = 0;
+
+  #pragma omp parallel
+  #pragma omp for reduction (inscan,+:va) firstprivate (res) lastprivate (res)
+  for (j = 0; j < 8; j++)
+    {
+      va = svindex_s32 (1, 0);
+      #pragma omp scan inclusive (va)
+      res += svaddv_s32 (svptrue_b32 (), va);
+    }
+
+  if (res != 64)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  parallel_reduction ();
+  for_reduction ();
+  simd_reduction ();
+  inscan_reduction_incl ();
+}
diff --git a/libgomp/testsuite/libgomp.c/append-args-fr-1.c b/libgomp/testsuite/libgomp.c/append-args-fr-1.c
new file mode 100644
index 0000000..2fd7eda
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/append-args-fr-1.c
@@ -0,0 +1,232 @@
+/* { dg-do run } */
+
+#include "append-args-fr.h"
+
+enum { host_device, nvptx_device, gcn_device } used_device_type, used_device_type2;
+static int used_device_num, used_device_num2;
+static omp_interop_fr_t expected_fr, expected_fr2;
+static _Bool is_targetsync, is_targetsync2;
+
+void
+check_interop (omp_interop_t obj)
+{
+  if (used_device_type == host_device)
+    check_host (obj);
+  else if (used_device_type == nvptx_device)
+    check_nvptx (obj, used_device_num, expected_fr, is_targetsync);
+  else if (used_device_type == gcn_device)
+    check_gcn (obj, used_device_num, expected_fr, is_targetsync);
+  else
+    __builtin_unreachable ();
+
+  #pragma omp interop use(obj)
+}
+
+void
+check_interop2 (omp_interop_t obj, omp_interop_t obj2)
+{
+  check_interop (obj);
+
+  #pragma omp interop use(obj2)
+
+  if (used_device_type2 == host_device)
+    check_host (obj2);
+  else if (used_device_type2 == nvptx_device)
+    check_nvptx (obj2, used_device_num2, expected_fr2, is_targetsync2);
+  else if (used_device_type2 == gcn_device)
+    check_gcn (obj2, used_device_num2, expected_fr2, is_targetsync2);
+  else
+    __builtin_unreachable ();
+}
+
+
+/* Check no args + one interop arg - and no prefer_type.  */
+
+int f0_1_tg_ (omp_interop_t obj) { check_interop (obj); return 4242; }
+#pragma omp declare variant(f0_1_tg_)     match(construct={dispatch}) append_args(interop(target))
+int f0_1_tg () { assert (false); return 42; }
+
+void f0_1_tgsy_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_)   match(construct={dispatch}) append_args(interop(targetsync))
+void f0_1_tgsy () { assert (false); }
+
+int f0_1_tgtgsy_ (omp_interop_t obj) { check_interop (obj); return 3333; }
+#pragma omp declare variant(f0_1_tgtgsy_) match(construct={dispatch}) append_args(interop(targetsync,target))
+int f0_1_tgtgsy () { assert (false); return 33; }
+
+
+/* And with PREFER_TYPE.  */
+
+// nv: cuda, gcn: -, -, hip
+void f0_1_tgsy_c_cd_hi_hs_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_c_cd_hi_hs_)   match(construct={dispatch}) \
+            append_args(interop(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa")))
+void f0_1_tgsy_c_cd_hi_hs () { assert (false); }
+
+// nv: -, cuda_driver, gcn: hsa
+void f0_1_tgsy_hs_cd_c_hi_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_hs_cd_c_hi_)   match(construct={dispatch}) \
+            append_args(interop(targetsync, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda_driver"), attr("ompx_foobar")},{fr("cuda")}, {fr("hip")})))
+void f0_1_tgsy_hs_cd_c_hi () { assert (false); }
+
+// nv: -, hip, gcn: hsa
+void f0_1_tgsy_hs_hi_cd_c_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_hs_hi_cd_c_)   match(construct={dispatch}) \
+            append_args(interop(targetsync, prefer_type("hsa", "hip", "cuda_driver", "cuda")))
+void f0_1_tgsy_hs_hi_cd_c () { assert (false); }
+
+
+void
+check_f0 ()
+{
+  if (used_device_type == nvptx_device)
+    expected_fr = omp_ifr_cuda;
+  else if (used_device_type == gcn_device)
+    expected_fr = omp_ifr_hip;
+  else  /* host; variable shall not be accessed  */
+    expected_fr = omp_ifr_level_zero;
+
+  int i;
+  if (used_device_num == DEFAULT_DEVICE)
+    {
+      is_targetsync = 0;
+      #pragma omp dispatch
+        i = f0_1_tg ();
+      assert (i == 4242);
+
+      is_targetsync = 1;
+      #pragma omp dispatch
+        f0_1_tgsy ();
+
+      #pragma omp dispatch
+        i = f0_1_tgtgsy ();
+      assert (i == 3333);
+
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hip;
+      #pragma omp dispatch
+	 f0_1_tgsy_c_cd_hi_hs ();
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda_driver;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch
+	f0_1_tgsy_hs_cd_c_hi ();
+  
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_hip;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch
+	f0_1_tgsy_hs_hi_cd_c ();
+    }
+  else
+    {
+      is_targetsync = 0;
+      #pragma omp dispatch device(used_device_num)
+        i = f0_1_tg ();
+      assert (i == 4242);
+
+      is_targetsync = 1;
+      #pragma omp dispatch device(used_device_num)
+        f0_1_tgsy ();
+
+      #pragma omp dispatch device(used_device_num)
+        i = f0_1_tgtgsy ();
+      assert (i == 3333);
+
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hip;
+      #pragma omp dispatch device(used_device_num)
+	 f0_1_tgsy_c_cd_hi_hs ();
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda_driver;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch device(used_device_num)
+	f0_1_tgsy_hs_cd_c_hi ();
+  
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_hip;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch device(used_device_num)
+	f0_1_tgsy_hs_hi_cd_c ();
+    }
+}
+
+
+
+void
+do_check (int dev)
+{
+  int num_dev = omp_get_num_devices ();
+  const char *dev_type;
+  if (dev != DEFAULT_DEVICE)
+    omp_set_default_device (dev);
+  int is_nvptx = on_device_arch_nvptx ();
+  int is_gcn = on_device_arch_gcn ();
+  int is_host;
+ 
+  if (dev != DEFAULT_DEVICE)
+    is_host = dev == -1 || dev == num_dev;
+  else
+    {
+      int def_dev = omp_get_default_device ();
+      is_host = def_dev == -1 || def_dev == num_dev;
+    }
+
+  assert (is_nvptx + is_gcn + is_host == 1);
+
+  if (num_dev > 0 && dev != DEFAULT_DEVICE)
+    {
+      if (is_host)
+	omp_set_default_device (0);
+      else
+	omp_set_default_device (-1);
+    }
+
+  used_device_num = dev;
+  if (is_host)
+    {
+      dev_type = "host";
+      used_device_type = host_device;
+    }
+  else if (is_nvptx)
+    {
+      dev_type = "nvptx";
+      used_device_type = nvptx_device;
+    }
+  else if (is_gcn)
+    {
+      dev_type = "gcn";
+      used_device_type = gcn_device;
+    }
+
+  printf ("Running on the %s device (%d)\n", dev_type, dev);
+  check_f0 ();
+}
+
+
+
+int
+main ()
+{
+  do_check (DEFAULT_DEVICE);
+  int ndev = omp_get_num_devices ();
+  for (int dev = -1; dev < ndev; dev++)
+    do_check (dev);
+  for (int dev = -1; dev < ndev; dev++)
+    {
+      omp_set_default_device (dev);
+      do_check (DEFAULT_DEVICE);
+    }
+}
diff --git a/libgomp/testsuite/libgomp.c/append-args-fr.h b/libgomp/testsuite/libgomp.c/append-args-fr.h
new file mode 100644
index 0000000..9f6ca04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/append-args-fr.h
@@ -0,0 +1,305 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+/* Provides:  */
+
+#define DEFAULT_DEVICE -99
+
+void check_host (omp_interop_t obj);
+void check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync);
+void check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync);
+
+
+/* The following assumes that when a nvptx device is available,
+   cuda/cuda_driver/hip are supported.
+   And that likewise when a gcn device is available that the
+   plugin also can not only the HSA but also the HIP library
+   such that hsa/hip are supported.
+   For the host, omp_interop_none is expected.
+
+   Otherwise, it only does some basic tests without checking
+   that the returned result really makes sense.  */
+
+void check_type (omp_interop_t obj)
+{
+  const char *type;
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_fr_id);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "omp_interop_t") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_fr_name);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "const char *") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_vendor);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "int") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_vendor_name);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "const char *") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_device_num);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "int") == 0);
+  else
+    assert (type == NULL);
+
+  if (obj != omp_interop_none)
+    return;
+  assert (omp_get_interop_type_desc (obj, omp_ipr_platform) == NULL);
+  assert (omp_get_interop_type_desc (obj, omp_ipr_device) == NULL);
+  assert (omp_get_interop_type_desc (obj, omp_ipr_device_context) == NULL);
+  assert (omp_get_interop_type_desc (obj, omp_ipr_targetsync) == NULL);
+}
+
+
+void
+check_host (omp_interop_t obj)
+{
+  assert (obj == omp_interop_none);
+  check_type (obj);
+}
+
+
+void
+check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync)
+{
+  assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
+
+  omp_interop_rc_t ret_code = omp_irc_no_value;
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
+
+  assert (ret_code == omp_irc_success);
+  assert (fr == expected_fr);
+
+  ret_code = omp_irc_no_value;
+  const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
+
+  assert (ret_code == omp_irc_success);
+  if (fr == omp_ifr_cuda)
+    assert (strcmp (fr_name, "cuda") == 0);
+  else if (fr == omp_ifr_cuda_driver)
+    assert (strcmp (fr_name, "cuda_driver") == 0);
+  else if (fr == omp_ifr_hip)
+    assert (strcmp (fr_name, "hip") == 0);
+  else
+    assert (0);
+
+  ret_code = omp_irc_no_value;
+  int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (vendor == 11);  /* Nvidia */
+
+  ret_code = omp_irc_no_value;
+  const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (strcmp (vendor_name, "nvidia") == 0);
+
+  ret_code = omp_irc_no_value;
+  int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
+  assert (ret_code == omp_irc_success);
+  if (dev == DEFAULT_DEVICE)
+    assert (dev_num == omp_get_default_device ());
+  else
+    assert (dev_num == dev);
+
+  /* Platform: N/A.  */
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+
+  /* Device: int / CUdevice / hipDevice_t -- all internally an 'int'.  */
+  ret_code = omp_irc_no_value;
+  int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
+
+  /* CUDA also starts from 0 and goes to < n with cudaGetDeviceCount(&cn).  */
+  assert (ret_code == omp_irc_success);
+  assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
+
+  /* Device context: N/A / CUcontext / hipCtx_t -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
+
+  if (fr == omp_ifr_cuda)
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (ctx == NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_success);
+      assert (ctx != NULL);
+    }
+
+  /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
+
+  if (is_targetsync)  /* no targetsync */
+    {
+      assert (ret_code == omp_irc_success);
+      assert (stream != NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (stream == NULL);
+    }
+
+  check_type (obj);
+  if (fr == omp_ifr_cuda)
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "int") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "cudaStream_t") == 0);
+    }
+  else if (fr == omp_ifr_cuda_driver)
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "CUdevice") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "CUcontext") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "CUstream") == 0);
+    }
+  else
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
+    }
+}
+
+
+void
+check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync)
+{
+  assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
+
+  omp_interop_rc_t ret_code = omp_irc_no_value;
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
+
+  assert (ret_code == omp_irc_success);
+  assert (fr == expected_fr);
+
+  ret_code = omp_irc_no_value;
+  const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
+
+   assert (ret_code == omp_irc_success);
+  if (fr == omp_ifr_hip)
+    assert (strcmp (fr_name, "hip") == 0);
+  else if (fr == omp_ifr_hsa)
+    assert (strcmp (fr_name, "hsa") == 0);
+  else
+    assert (0);
+
+  ret_code = omp_irc_no_value;
+  int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (vendor == 1);  /* Amd */
+
+  ret_code = omp_irc_no_value;
+  const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (strcmp (vendor_name, "amd") == 0);
+
+  ret_code = omp_irc_no_value;
+  int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
+  assert (ret_code == omp_irc_success);
+  if (dev == DEFAULT_DEVICE)
+    assert (dev_num == omp_get_default_device ());
+  else
+    assert (dev_num == dev);
+
+  /* Platform: N/A.  */
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+
+  /* Device: hipDevice_t / hsa_agent_t* -- hip is internally an 'int'.  */
+  ret_code = omp_irc_no_value;
+  if (fr == omp_ifr_hip)
+    {
+      /* HIP also starts from 0 and goes to < n as with cudaGetDeviceCount(&cn).  */
+      int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
+      assert (ret_code == omp_irc_success);
+      assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
+    }
+  else
+    {
+      void *agent = omp_get_interop_ptr (obj, omp_ipr_device, &ret_code);
+      assert (ret_code == omp_irc_success);
+      assert (agent != NULL);
+    }
+
+  /* Device context: hipCtx_t / N/A -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
+  if (fr == omp_ifr_hip)
+    {
+      assert (ret_code == omp_irc_success);
+      assert (ctx != NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (ctx == NULL);
+    }
+
+  /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
+
+  if (is_targetsync)
+    {
+      assert (ret_code == omp_irc_success);
+      assert (stream != NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (stream == NULL);
+    }
+
+  check_type (obj);
+  if (fr == omp_ifr_hip)
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
+    }
+  else
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hsa_agent_t *") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hsa_queue_t *") == 0);
+    }
+}