206 files changed, 16346 insertions, 156 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index a60e51a..d3384b0 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,634 @@
+2025-06-24  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.texi (acc_attach, acc_detach): Update for Fortran
+	version.
+	* openacc.f90 (acc_attach{,_async}, acc_detach{,_finalize}{,_async}):
+	Add.
+	* openacc_lib.h: Likewise.
+	* testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90: New test.
+	* testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90: New test.
+
+2025-06-19  Tobias Burnus  <tburnus@baylibre.com>
+
+	* target.c (GOMP_REQUIRES_NAME_BUF_LEN): Define.
+	(GOMP_offload_register_ver, gomp_target_init): Use it for the
+	char buffer size.
+
+2025-06-19  Tobias Burnus  <tburnus@baylibre.com>
+	    waffl3x  <waffl3x@baylibre.com>
+
+	* libgomp.texi (omp_init_allocator): Refer to 'Memory allocation'
+	for available memory spaces.
+	(OMP_ALLOCATOR): Move list of traits and predefined memspaces
+	and allocators to ...
+	(Memory allocation): ... here. Document omp(x)::allocator::*;
+	minor wording tweaks, be more explicit about memkind, pinned and
+	pool_size.
+
+2025-06-17  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c++/declare_target-2.C: New test.
+
+2025-06-10  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c/declare-variant-4.h (gfx942): New variant function.
+	* testsuite/libgomp.c/declare-variant-4-gfx942.c: New test.
+
+2025-06-06  Tobias Burnus  <tburnus@baylibre.com>
+	    Sandra Loosemore  <sloosemore@baylibre.com>
+
+	* libgomp.texi (omp_get_num_devices, omp_get_intrinsic_device):
+	Document builtin handling.
+
+2025-06-06  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR target/120530
+	* testsuite/libgomp.c/target-map-zero-sized-3.c (main): Add missing
+	map clause; remove unused variable.
+
+2025-06-04  Tobias Burnus  <tburnus@baylibre.com>
+	    Sandra Loosemore  <sloosemore@baylibre.com>
+
+	* libgomp.texi (omp_interop_{int,ptr,str,rc_desc}): Add note about
+	the 'ret_code' type change in OpenMP 6.
+
+2025-06-03  Jakub Jelinek  <jakub@redhat.com>
+
+	PR libgomp/120444
+	* testsuite/libgomp.c-c++-common/omp_target_memset-3.c (test_it):
+	Change ptr argument type from void * to int8_t *.
+	(main): Change ptr variable type from void * to int8_t * and cast
+	omp_target_alloc result to the latter type.
+
+2025-06-02  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR libgomp/120444
+	* libgomp-plugin.h (GOMP_OFFLOAD_memset): Declare.
+	* libgomp.h (struct gomp_device_descr): Add memset_func.
+	* libgomp.map (GOMP_6.0.1): Add omp_target_memset{,_async}.
+	* libgomp.texi (Device Memory Routines): Document them.
+	* omp.h.in (omp_target_memset, omp_target_memset_async): Declare.
+	* omp_lib.f90.in (omp_target_memset, omp_target_memset_async):
+	Add interfaces.
+	* omp_lib.h.in (omp_target_memset, omp_target_memset_async): Likewise.
+	* plugin/cuda-lib.def: Add cuMemsetD8.
+	* plugin/plugin-gcn.c (struct hsa_runtime_fn_info): Add
+	hsa_amd_memory_fill_fn.
+	(init_hsa_runtime_functions): DLSYM_OPT_FN load it.
+	(GOMP_OFFLOAD_memset): New.
+	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_memset): New.
+	* target.c (omp_target_memset_int, omp_target_memset,
+	omp_target_memset_async_helper, omp_target_memset_async): New.
+	(gomp_load_plugin_for_device): Add DLSYM (memset).
+	* testsuite/libgomp.c-c++-common/omp_target_memset.c: New test.
+	* testsuite/libgomp.c-c++-common/omp_target_memset-2.c: New test.
+	* testsuite/libgomp.c-c++-common/omp_target_memset-3.c: New test.
+	* testsuite/libgomp.fortran/omp_target_memset.f90: New test.
+	* testsuite/libgomp.fortran/omp_target_memset-2.f90: New test.
+
+2025-05-30  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-std__valarray-1.C: New.
+	* testsuite/libgomp.c++/target-std__valarray-1.output: Likewise.
+
+2025-05-30  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-std__array-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__array-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__bitset-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__deque-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__deque-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__forward_list-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__list-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__list-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__map-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__map-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__multimap-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__multiset-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__set-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__set-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__span-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__span-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__valarray-concurrent.C: Adjust.
+	* testsuite/libgomp.c++/target-std__vector-concurrent-usm.C: New.
+	* testsuite/libgomp.c++/target-std__vector-concurrent.C: Adjust.
+
+2025-05-30  Kwok Cheung Yeung  <kcyeung@baylibre.com>
+	    Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-std__array-concurrent.C: New.
+	* testsuite/libgomp.c++/target-std__bitset-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__deque-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__flat_map-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__flat_set-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__forward_list-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__list-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__map-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__multimap-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__multiset-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__set-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__span-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_map-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__unordered_set-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__valarray-concurrent.C: Likewise.
+	* testsuite/libgomp.c++/target-std__vector-concurrent.C: Likewise.
+
+2025-05-30  Kwok Cheung Yeung  <kcyeung@baylibre.com>
+
+	* testsuite/libgomp.c++/target-std__cmath.C: New.
+	* testsuite/libgomp.c++/target-std__complex.C: Likewise.
+	* testsuite/libgomp.c++/target-std__numbers.C: Likewise.
+
+2025-05-30  Waffl3x  <waffl3x@baylibre.com>
+	    Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-flex-10.C: New test.
+	* testsuite/libgomp.c++/target-flex-100.C: New test.
+	* testsuite/libgomp.c++/target-flex-101.C: New test.
+	* testsuite/libgomp.c++/target-flex-11.C: New test.
+	* testsuite/libgomp.c++/target-flex-12.C: New test.
+	* testsuite/libgomp.c++/target-flex-2000.C: New test.
+	* testsuite/libgomp.c++/target-flex-2001.C: New test.
+	* testsuite/libgomp.c++/target-flex-2002.C: New test.
+	* testsuite/libgomp.c++/target-flex-2003.C: New test.
+	* testsuite/libgomp.c++/target-flex-30.C: New test.
+	* testsuite/libgomp.c++/target-flex-300.C: New test.
+	* testsuite/libgomp.c++/target-flex-31.C: New test.
+	* testsuite/libgomp.c++/target-flex-32.C: New test.
+	* testsuite/libgomp.c++/target-flex-33.C: New test.
+	* testsuite/libgomp.c++/target-flex-41.C: New test.
+	* testsuite/libgomp.c++/target-flex-60.C: New test.
+	* testsuite/libgomp.c++/target-flex-61.C: New test.
+	* testsuite/libgomp.c++/target-flex-62.C: New test.
+	* testsuite/libgomp.c++/target-flex-70.C: New test.
+	* testsuite/libgomp.c++/target-flex-80.C: New test.
+	* testsuite/libgomp.c++/target-flex-81.C: New test.
+	* testsuite/libgomp.c++/target-flex-90.C: New test.
+	* testsuite/libgomp.c++/target-flex-common.h: New test.
+
+2025-05-30  Thomas Schwinge  <tschwinge@baylibre.com>
+	    Richard Biener  <rguenther@suse.de>
+
+	PR middle-end/119835
+	* testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c:
+	'#pragma GCC optimize "-fno-inline"'.
+	* testsuite/libgomp.c-c++-common/target-abi-struct-1.c: New.
+	* testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: Adjust.
+
+2025-05-30  Julian Brown  <julian@codesourcery.com>
+
+	* testsuite/libgomp.c-c++-common/declare-mapper-9.c: Enable for C.
+	* testsuite/libgomp.c-c++-common/declare-mapper-10.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-11.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-12.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-13.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-14.c: Likewise.
+
+2025-05-30  Julian Brown  <julian@codesourcery.com>
+	    Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c++/declare-mapper-1.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-2.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-3.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-4.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-5.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-6.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-7.C: New test.
+	* testsuite/libgomp.c++/declare-mapper-8.C: New test.
+	* testsuite/libgomp.c-c++-common/declare-mapper-9.c: New test (only
+	enabled for C++ for now).
+	* testsuite/libgomp.c-c++-common/declare-mapper-10.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-11.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-12.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-13.c: Likewise.
+	* testsuite/libgomp.c-c++-common/declare-mapper-14.c: Likewise.
+
+2025-05-29  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR libgomp/93226
+	* libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_dev2dev): New
+	prototype.
+	* libgomp.h (struct acc_dispatch_t): Add dev2dev_func.
+	(gomp_copy_dev2dev): New prototype.
+	* libgomp.map (OACC_2.6.1): New; add acc_memcpy_device{,_async}.
+	* libgomp.texi (acc_memcpy_device): New.
+	* oacc-mem.c (memcpy_tofrom_device): Change to take from/to
+	device boolean; use memcpy not memmove; add early return if
+	size == 0 or same device + same ptr.
+	(acc_memcpy_to_device, acc_memcpy_to_device_async,
+	acc_memcpy_from_device, acc_memcpy_from_device_async): Update.
+	(acc_memcpy_device, acc_memcpy_device_async): New.
+	* openacc.f90 (acc_memcpy_device, acc_memcpy_device_async):
+	Add interface.
+	* openacc_lib.h (acc_memcpy_device, acc_memcpy_device_async):
+	Likewise.
+	* openacc.h (acc_memcpy_device, acc_memcpy_device_async): Add
+	prototype.
+	* plugin/plugin-gcn.c (GOMP_OFFLOAD_openacc_async_host2dev):
+	Update comment.
+	(GOMP_OFFLOAD_openacc_async_dev2host): Update call.
+	(GOMP_OFFLOAD_openacc_async_dev2dev): New.
+	* plugin/plugin-nvptx.c (cuda_memcpy_dev_sanity_check): New.
+	(GOMP_OFFLOAD_dev2dev): Call it.
+	(GOMP_OFFLOAD_openacc_async_dev2dev): New.
+	* target.c (gomp_copy_dev2dev): New.
+	(gomp_load_plugin_for_device): Load dev2dev and async_dev2dev.
+	* testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c: New test.
+	* testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90: New test.
+
+2025-05-28  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR middle-end/118694
+	* testsuite/libgomp.fortran/metadirective-1.f90: xfail when
+	compiling (also) for nvptx offloading as an error is then expected.
+
+2025-05-23  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR middle-end/118694
+	* testsuite/libgomp.c-c++-common/metadirective-1.c: xfail when
+	compiling (also) for nvptx offloading as an error is then expected.
+
+2025-05-19  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR lto/120308
+	* testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: Add empty
+	structure testing.
+
+2025-05-19  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c: New.
+	* testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c: Likewise.
+
+2025-05-19  Julian Brown  <julian@codesourcery.com>
+
+	* testsuite/libgomp.oacc-fortran/lib-13.f90: End data region after
+	wait API calls.
+
+2025-05-15  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.fortran/alloc-comp-4.f90: New test.
+
+2025-05-14  Tobias Burnus  <tburnus@baylibre.com>
+
+	* target.c (gomp_attach_pointer): Return bool; accept additional
+	bool to optionally silence the fatal pointee-not-found error.
+	(gomp_map_vars_internal): If the pointee could not be found,
+	check whether it was mapped as GOMP_MAP_ZERO_LEN_ARRAY_SECTION.
+	* libgomp.h (gomp_attach_pointer): Update prototype.
+	* oacc-mem.c (acc_attach_async, goacc_enter_data_internal): Update
+	calls.
+	* testsuite/libgomp.c/target-map-zero-sized.c: New test.
+	* testsuite/libgomp.c/target-map-zero-sized-2.c: New test.
+	* testsuite/libgomp.c/target-map-zero-sized-3.c: New test.
+
+2025-05-12  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/119692
+	* testsuite/libgomp.c++/pr119692-1-4.C: '{ dg-timeout 10 }'.
+	* testsuite/libgomp.c++/pr119692-1-5.C: Likewise.
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-1.C: Likewise.
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise.
+
+2025-05-12  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c/declare-variant-3-sm61.c: New.
+	* testsuite/libgomp.c/declare-variant-3.h: Adjust.
+
+2025-05-09  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.c/interop-cuda-full.c: Use 'link' instead
+	of 'run' when the default device is "! offload_device_nvptx".
+	* testsuite/libgomp.c/interop-cuda-libonly.c: Likewise.
+	* testsuite/libgomp.c/interop-hip-nvidia-full.c: Likewise.
+	* testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: Likewise.
+	* testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: Likewise.
+	* testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: Likewise.
+	* testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: Likewise.
+	* testsuite/libgomp.c/interop-hip-amd-full.c: Use 'link' instead
+	of 'run' when the default device is "! offload_device_gcn".
+	* testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: Likewise.
+	* testsuite/libgomp.fortran/interop-hip-amd-full.F90: Likewise.
+	* testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: Likewise.
+
+2025-05-09  David Malcolm  <dmalcolm@redhat.com>
+
+	PR other/116792
+	* testsuite/lib/libgomp.exp: Add load_lib of scanhtml.exp.
+
+2025-05-07  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.fortran/map-alloc-comp-9.f90: Process differently
+	when USE_USM_REQUIREMENT is set.
+	* testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90: New test.
+
+2025-05-06  Tejas Belagod  <tejas.belagod@arm.com>
+
+	* testsuite/libgomp.c-target/aarch64/udr-sve.c: Fix test.
+
+2025-05-05  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c/interop-hsa.c: GCN offloading only.
+
+2025-05-01  Tobias Burnus  <tobias@codesourcery.com>
+
+	* testsuite/libgomp.fortran/allocate-8a.f90: New test.
+
+2025-04-25  Andrew Stubbs  <ams@baylibre.com>
+
+	* testsuite/libgomp.c/interop-hsa.c: New test.
+
+2025-04-25  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/119853
+	PR target/119854
+	* target-cxa-dso-dtor.c: New.
+	* config/accel/target-cxa-dso-dtor.c: Likewise.
+	* Makefile.am (libgomp_la_SOURCES): Add it.
+	* Makefile.in: Regenerate.
+	* testsuite/libgomp.c++/target-cdtor-1.C: New.
+	* testsuite/libgomp.c++/target-cdtor-2.C: Likewise.
+
+2025-04-25  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c-c++-common/target-cdtor-1.c: New.
+
+2025-04-25  Andrew Pinski  <quic_apinski@quicinc.com>
+	    Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/119737
+	* testsuite/libgomp.c++/target-exceptions-throw-1.C: Remove
+	PR119737 XFAILing.
+	* testsuite/libgomp.c++/target-exceptions-throw-2.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-throw-1.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-throw-2.C: Likewise.
+
+2025-04-25  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/118794
+	* testsuite/libgomp.c++/target-exceptions-pr118794-1.C: Adjust for
+	'targetm.arm_eabi_unwinder'.
+	* testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C:
+	Likewise.
+
+2025-04-24  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/lib/libgomp.exp
+	(check_effective_target_gomp_hip_header_nvidia): Compile with
+	"-Wno-deprecated-declarations".
+	* testsuite/libgomp.c/interop-hip-nvidia-full.c: Likewise.
+	* testsuite/libgomp.c/interop-hipblas-nvidia-full.c: Likewise.
+	* testsuite/libgomp.c/interop-hipblas.h: Add workarounds
+	when using the HIP headers with __HIP_PLATFORM_NVIDIA__.
+
+2025-04-24  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/lib/libgomp.exp (check_effective_target_openacc_cublas,
+	check_effective_target_openacc_cudart): Update description as
+	the check requires more.
+	(check_effective_target_openacc_libcuda,
+	check_effective_target_openacc_libcublas,
+	check_effective_target_openacc_libcudart,
+	check_effective_target_gomp_hip_header_amd,
+	check_effective_target_gomp_hip_header_nvidia,
+	check_effective_target_gomp_hipfort_module,
+	check_effective_target_gomp_libamdhip64,
+	check_effective_target_gomp_libhipblas): New.
+	* testsuite/libgomp.c-c++-common/interop-2.c: New test.
+	* testsuite/libgomp.c/interop-cublas-full.c: New test.
+	* testsuite/libgomp.c/interop-cublas-libonly.c: New test.
+	* testsuite/libgomp.c/interop-cuda-full.c: New test.
+	* testsuite/libgomp.c/interop-cuda-libonly.c: New test.
+	* testsuite/libgomp.c/interop-hip-amd-full.c: New test.
+	* testsuite/libgomp.c/interop-hip-amd-no-hip-header.c: New test.
+	* testsuite/libgomp.c/interop-hip-nvidia-full.c: New test.
+	* testsuite/libgomp.c/interop-hip-nvidia-no-headers.c: New test.
+	* testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c: New test.
+	* testsuite/libgomp.c/interop-hip.h: New test.
+	* testsuite/libgomp.c/interop-hipblas-amd-full.c: New test.
+	* testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c: New test.
+	* testsuite/libgomp.c/interop-hipblas-nvidia-full.c: New test.
+	* testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c: New test.
+	* testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c: New test.
+	* testsuite/libgomp.c/interop-hipblas.h: New test.
+	* testsuite/libgomp.fortran/interop-hip-amd-full.F90: New test.
+	* testsuite/libgomp.fortran/interop-hip-amd-no-module.F90: New test.
+	* testsuite/libgomp.fortran/interop-hip-nvidia-full.F90: New test.
+	* testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90: New test.
+	* testsuite/libgomp.fortran/interop-hip.h: New test.
+
+2025-04-23  Tobias Burnus  <tburnus@baylibre.com>
+
+	* testsuite/libgomp.fortran/target-enter-data-8.f90: New test.
+
+2025-04-17  Jakub Jelinek  <jakub@redhat.com>
+
+	PR libgomp/119849
+	* testsuite/libgomp.c++/allocator-1.C (test_inequality, main): Guard
+	ompx::allocator::gnu_pinned_mem uses with #ifdef __gnu_linux__.
+	* testsuite/libgomp.c++/allocator-2.C (main): Likewise.
+
+2025-04-17  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.texi (gcn interop, nvptx interop): For HIP with C/C++, add
+	a note about setting a preprocessor define.
+
+2025-04-16  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-exceptions-pr118794-1.C: Remove
+	'ALWAYS_INLINE' workaround.
+
+2025-04-16  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/106445
+	* testsuite/libgomp.c++/pr106445-1.C: New.
+	* testsuite/libgomp.c++/pr106445-1-O0.C: Likewise.
+
+2025-04-16  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/97106
+	* testsuite/libgomp.c++/pr96390.C: Un-XFAIL nvptx offloading.
+	* testsuite/libgomp.c-c++-common/pr96390.c: Adjust.
+
+2025-04-15  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.texi (gcn, nvptx): Mention self_maps clause
+	besides unified_shared_memory in the requirements item.
+
+2025-04-15  waffl3x  <waffl3x@baylibre.com>
+
+	* omp.h.in: Add omp::allocator::* and ompx::allocator::* allocators.
+	(__detail::__allocator_templ<T, omp_allocator_handle_t>):
+	New struct template.
+	(null_allocator<T>): New struct template.
+	(default_mem<T>): Likewise.
+	(large_cap_mem<T>): Likewise.
+	(const_mem<T>): Likewise.
+	(high_bw_mem<T>): Likewise.
+	(low_lat_mem<T>): Likewise.
+	(cgroup_mem<T>): Likewise.
+	(pteam_mem<T>): Likewise.
+	(thread_mem<T>): Likewise.
+	(ompx::allocator::gnu_pinned_mem<T>): Likewise.
+	* testsuite/libgomp.c++/allocator-1.C: New test.
+	* testsuite/libgomp.c++/allocator-2.C: New test.
+
+2025-04-15  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.texi (5.0 Impl. Status): Mark mapping alloc comps as 'Y'.
+	* testsuite/libgomp.fortran/allocatable-comp.f90: New test.
+	* testsuite/libgomp.fortran/map-alloc-comp-3.f90: New test.
+	* testsuite/libgomp.fortran/map-alloc-comp-4.f90: New test.
+	* testsuite/libgomp.fortran/map-alloc-comp-5.f90: New test.
+	* testsuite/libgomp.fortran/map-alloc-comp-6.f90: New test.
+	* testsuite/libgomp.fortran/map-alloc-comp-7.f90: New test.
+	* testsuite/libgomp.fortran/map-alloc-comp-8.f90: New test.
+	* testsuite/libgomp.fortran/map-alloc-comp-9.f90: New test.
+
+2025-04-14  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/118794
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C:
+	Set '-foffload-options=-mno-fake-exceptions'.
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C:
+	Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C:
+	Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C:
+	Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C:
+	Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: Adjust.
+	* testsuite/libgomp.c++/target-exceptions-pr118794-1.C: Likewise.
+	* testsuite/libgomp.c++/target-exceptions-throw-2.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-throw-2.C: Likewise.
+	* testsuite/libgomp.c++/target-exceptions-throw-2-O0.C: New.
+
+2025-04-14  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-exceptions-throw-3.C: New.
+	* testsuite/libgomp.oacc-c++/exceptions-throw-3.C: Likewise.
+
+2025-04-14  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-exceptions-throw-2.C: New.
+	* testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C: Likewise.
+	* testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-throw-2.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C: Likewise.
+
+2025-04-14  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-exceptions-throw-1.C: New.
+	* testsuite/libgomp.c++/target-exceptions-throw-1-O0.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-throw-1.C: Likewise.
+
+2025-04-14  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-3.C: New.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C: Likewise.
+
+2025-04-14  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-2.C: New.
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C: Likewise.
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C: Likewise.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C: Likewise.
+
+2025-04-14  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	* testsuite/libgomp.c++/target-exceptions-bad_cast-1.C: New.
+	* testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C: Likewise.
+
+2025-04-14  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR target/118794
+	* testsuite/libgomp.c++/target-exceptions-pr118794-1.C: New.
+	* testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C:
+	Likewise.
+	* testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C:
+	Likewise.
+
+2025-04-14  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR c++/119692
+	* testsuite/libgomp.c++/pr119692-1-1.C: New.
+	* testsuite/libgomp.c++/pr119692-1-2.C: Likewise.
+	* testsuite/libgomp.c++/pr119692-1-3.C: Likewise.
+	* testsuite/libgomp.c++/pr119692-1-4.C: Likewise.
+	* testsuite/libgomp.c++/pr119692-1-5.C: Likewise.
+	* testsuite/libgomp.oacc-c++/pr119692-1-1.C: Likewise.
+	* testsuite/libgomp.oacc-c++/pr119692-1-2.C: Likewise.
+	* testsuite/libgomp.oacc-c++/pr119692-1-3.C: Likewise.
+
+2025-04-10  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* testsuite/libgomp.c-target/aarch64/firstprivate.c: Add +sve pragma.
+	* testsuite/libgomp.c-target/aarch64/lastprivate.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/private.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/shared.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/simd-aligned.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/simd-nontemporal.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/threadprivate.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/udr-sve.c: Add an -march option.
+	(for_reduction): Use "+=" in the reduction loop.
+
+2025-04-08  Tobias Burnus  <tburnus@baylibre.com>
+
+	PR middle-end/119662
+	* testsuite/libgomp.c/append-args-fr-1.c: New test.
+	* testsuite/libgomp.c/append-args-fr.h: New test.
+
+2025-04-08  Tobias Burnus  <tburnus@baylibre.com>
+
+	* Makefile.am (%.mod): Add -Wno-c-binding-type.
+	* Makefile.in: Regenerate.
+
+2025-04-08  Tejas Belagod  <tejas.belagod@arm.com>
+
+	* testsuite/libgomp.c-target/aarch64/aarch64.exp: Test driver.
+	* testsuite/libgomp.c-target/aarch64/firstprivate.c: New test.
+	* testsuite/libgomp.c-target/aarch64/lastprivate.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/private.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/shared.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/simd-aligned.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/simd-nontemporal.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/threadprivate.c: Likewise.
+	* testsuite/libgomp.c-target/aarch64/udr-sve.c: Likewise.
+
+2025-04-07  Tobias Burnus  <tburnus@baylibre.com>
+
+	* libgomp.texi (omp_target_memcpy_rect_async,
+	omp_target_memcpy_rect): Add @ref to 'Offload-Target Specifics'.
+	(AMD Radeon (GCN)): Document how memcpy_rect is implemented.
+	(nvptx): Move item about memcpy_rect item down; use present tense.
+
 2025-03-26  Thomas Schwinge  <thomas@codesourcery.com>
 
 	PR driver/101544
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 855f0af..19479ae 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
 	target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
 	oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
 	priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-	oacc-target.c target-indirect.c
+	oacc-target.c target-indirect.c target-cxa-dso-dtor.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
@@ -97,7 +97,7 @@ openacc_kinds.mod: openacc.mod
 openacc.mod: openacc.lo
 	:
 %.mod: %.f90
-	$(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $<
+	$(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only -Wno-c-binding-type $<
 fortran.lo: libgomp_f.h
 fortran.o: libgomp_f.h
 env.lo: libgomp_f.h
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 25cb6fc..6d22b3d 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
 	oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
 	oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
 	affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
-	oacc-target.lo target-indirect.lo $(am__objects_1)
+	oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \
+	$(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
 	oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
 	affinity-fmt.c teams.c allocator.c oacc-profiling.c \
-	oacc-target.c target-indirect.c $(am__append_3)
+	oacc-target.c target-indirect.c target-cxa-dso-dtor.c \
+	$(am__append_3)
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -780,6 +782,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
@@ -1388,7 +1391,7 @@ openacc_kinds.mod: openacc.mod
 openacc.mod: openacc.lo
 	:
 %.mod: %.f90
-	$(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only $<
+	$(FC) $(FCFLAGS) -cpp -fopenmp -fsyntax-only -Wno-c-binding-type $<
 fortran.lo: libgomp_f.h
 fortran.o: libgomp_f.h
 env.lo: libgomp_f.h
diff --git a/libgomp/config/accel/target-cxa-dso-dtor.c b/libgomp/config/accel/target-cxa-dso-dtor.c
new file mode 100644
index 0000000..e40a5f0
--- /dev/null
+++ b/libgomp/config/accel/target-cxa-dso-dtor.c
@@ -0,0 +1,62 @@
+/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API
+
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "libgomp.h"
+
+extern void __cxa_finalize (void *);
+
+/* See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>.
+
+   Even if the device is '!DEFAULT_USE_CXA_ATEXIT', we may see '__cxa_atexit'
+   calls, referencing '__dso_handle', via a 'DEFAULT_USE_CXA_ATEXIT' host.
+   '__cxa_atexit' is provided by newlib, but use of '__dso_handle' for nvptx
+   results in 'ld' error:
+
+       unresolved symbol __dso_handle
+       collect2: error: ld returned 1 exit status
+       nvptx mkoffload: fatal error: [...]/x86_64-pc-linux-gnu-accel-nvptx-none-gcc returned 1 exit status
+
+   ..., or for GCN get an implicit definition (running with
+   '--trace-symbol=__dso_handle'):
+
+       ./a.xamdgcn-amdhsa.mkoffload.hsaco-a.xamdgcn-amdhsa.mkoffload.2.o: reference to __dso_handle
+       <internal>: definition of __dso_handle
+
+   ..., which might be fine, but let's just make it explicit.  */
+
+/* There are no DSOs; this is the main program.  */
+attribute_hidden void * const __dso_handle = 0;
+
+/* If this file gets linked in, that means that '__dso_handle' has been
+   referenced (for '__cxa_atexit'), and in that case, we also have to run
+   '__cxa_finalize'.  Make that happen by overriding the weak libgcc dummy
+   function '__GCC_offload___cxa_finalize'.  */
+
+void
+__GCC_offload___cxa_finalize (void *dso_handle)
+{
+  __cxa_finalize (dso_handle);
+}
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 924fc1f..191106b 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -177,6 +177,7 @@ extern int GOMP_OFFLOAD_memcpy3d (int, int, size_t, size_t, size_t, void *,
 				  size_t, size_t, size_t, size_t, size_t,
 				  const void *, size_t, size_t, size_t, size_t,
 				  size_t);
+extern bool GOMP_OFFLOAD_memset (int, void *, int, size_t);
 extern bool GOMP_OFFLOAD_can_run (void *);
 extern void GOMP_OFFLOAD_run (int, void *, void *, void **);
 extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *);
@@ -200,6 +201,8 @@ extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size
 						 struct goacc_asyncqueue *);
 extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t,
 						 struct goacc_asyncqueue *);
+extern bool GOMP_OFFLOAD_openacc_async_dev2dev (int, void *, const void *, size_t,
+						struct goacc_asyncqueue *);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void);
 extern void *GOMP_OFFLOAD_openacc_cuda_get_stream (struct goacc_asyncqueue *);
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index d97768f..a433983 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1360,6 +1360,7 @@ typedef struct acc_dispatch_t
     __typeof (GOMP_OFFLOAD_openacc_async_exec) *exec_func;
     __typeof (GOMP_OFFLOAD_openacc_async_dev2host) *dev2host_func;
     __typeof (GOMP_OFFLOAD_openacc_async_host2dev) *host2dev_func;
+    __typeof (GOMP_OFFLOAD_openacc_async_dev2dev) *dev2dev_func;
   } async;
 
   __typeof (GOMP_OFFLOAD_openacc_get_property) *get_property_func;
@@ -1420,9 +1421,10 @@ struct gomp_device_descr
   __typeof (GOMP_OFFLOAD_free) *free_func;
   __typeof (GOMP_OFFLOAD_dev2host) *dev2host_func;
   __typeof (GOMP_OFFLOAD_host2dev) *host2dev_func;
+  __typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func;
   __typeof (GOMP_OFFLOAD_memcpy2d) *memcpy2d_func;
   __typeof (GOMP_OFFLOAD_memcpy3d) *memcpy3d_func;
-  __typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func;
+  __typeof (GOMP_OFFLOAD_memset) *memset_func;
   __typeof (GOMP_OFFLOAD_can_run) *can_run_func;
   __typeof (GOMP_OFFLOAD_run) *run_func;
   __typeof (GOMP_OFFLOAD_async_run) *async_run_func;
@@ -1467,11 +1469,14 @@ extern void gomp_copy_host2dev (struct gomp_device_descr *,
 extern void gomp_copy_dev2host (struct gomp_device_descr *,
 				struct goacc_asyncqueue *, void *, const void *,
 				size_t);
+extern void gomp_copy_dev2dev (struct gomp_device_descr *,
+			       struct goacc_asyncqueue *, void *, const void *,
+			       size_t);
 extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t);
-extern void gomp_attach_pointer (struct gomp_device_descr *,
+extern bool gomp_attach_pointer (struct gomp_device_descr *,
 				 struct goacc_asyncqueue *, splay_tree,
 				 splay_tree_key, uintptr_t, size_t,
-				 struct gomp_coalesce_buf *, bool);
+				 struct gomp_coalesce_buf *, bool, bool);
 extern void gomp_detach_pointer (struct gomp_device_descr *,
 				 struct goacc_asyncqueue *, splay_tree_key,
 				 uintptr_t, bool, struct gomp_coalesce_buf *);
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index eae2f53..f6aee7c 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -453,6 +453,12 @@ GOMP_6.0 {
 	omp_get_uid_from_device_8_;
 } GOMP_5.1.3;
 
+GOMP_6.0.1 {
+  global:
+	omp_target_memset;
+	omp_target_memset_async;
+} GOMP_6.0;
+
 OACC_2.0 {
   global:
 	acc_get_num_devices;
@@ -609,6 +615,12 @@ OACC_2.6 {
 	acc_get_property_string_h_;
 } OACC_2.5.1;
 
+OACC_2.6.1 {
+  global:
+	acc_memcpy_device;
+	acc_memcpy_device_async;
+} OACC_2.6;
+
 GOACC_2.0 {
   global:
 	GOACC_data_end;
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 4217c29..5518033 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -258,7 +258,7 @@ The OpenMP 4.5 specification is fully supported.
       device memory mapped by an array section @tab P @tab
 @item Mapping of Fortran pointer and allocatable variables, including pointer
       and allocatable components of variables
-      @tab P @tab Mapping of vars with allocatable components unsupported
+      @tab Y @tab
 @item @code{defaultmap} extensions @tab Y @tab
 @item @code{declare mapper} directive @tab N @tab
 @item @code{omp_get_supported_active_levels} routine @tab Y @tab
@@ -603,7 +603,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab
       @code{omp_get_device_teams_thread_limit}, and
       @code{omp_set_device_teams_thread_limit} routines @tab N @tab
 @item @code{omp_target_memset} and @code{omp_target_memset_async} routines
-      @tab N @tab
+      @tab Y @tab
 @item Fortran version of the interop runtime routines @tab Y @tab
 @item Routines for obtaining memory spaces/allocators for shared/device memory
       @tab N @tab
@@ -1802,6 +1802,11 @@ Returns the number of available non-host devices.
 
 The effect of running this routine in a @code{target} region is unspecified.
 
+Note that in GCC the function is marked pure, i.e. as returning always the
+same number.  When GCC was not configured to support offloading, it is replaced
+by zero; compile with @option{-fno-builtin-omp_get_num_devices} if a run-time
+function is desired.
+
 @item @emph{C/C++}:
 @multitable @columnfractions .20 .80
 @item @emph{Prototype}: @tab @code{int omp_get_num_devices(void);}
@@ -1812,6 +1817,9 @@ The effect of running this routine in a @code{target} region is unspecified.
 @item @emph{Interface}: @tab @code{integer function omp_get_num_devices()}
 @end multitable
 
+@item @emph{See also}:
+@ref{omp_get_initial_device}
+
 @item @emph{Reference}:
 @uref{https://www.openmp.org, OpenMP specification v4.5}, Section 3.2.31.
 @end table
@@ -1950,6 +1958,12 @@ the value of @code{omp_initial_device}.
 
 The effect of running this routine in a @code{target} region is unspecified.
 
+Note that GCC inlines this function unless you compile with
+@option{-fno-builtin-omp_get_initial_device}.  If GCC was not configured to
+support offloading, it expands to constant zero; in non-host code it expands
+to @code{omp_initial_device}; and otherwise it is replaced with a call to
+@code{omp_get_num_devices}.
+
 @item @emph{C/C++}
 @multitable @columnfractions .20 .80
 @item @emph{Prototype}: @tab @code{int omp_get_initial_device(void);}
@@ -1984,8 +1998,8 @@ pointers on devices. They have C linkage and do not throw exceptions.
 * omp_target_memcpy_async:: Copy data between devices asynchronously
 * omp_target_memcpy_rect:: Copy a subvolume of data between devices
 * omp_target_memcpy_rect_async:: Copy a subvolume of data between devices asynchronously
-@c * omp_target_memset:: <fixme>/TR12
-@c * omp_target_memset_async:: <fixme>/TR12
+* omp_target_memset:: Set bytes in device memory
+* omp_target_memset_async:: Set bytes in device memory asynchronously
 * omp_target_associate_ptr:: Associate a device pointer with a host pointer
 * omp_target_disassociate_ptr:: Remove device--host pointer association
 * omp_get_mapped_ptr:: Return device pointer to a host pointer
@@ -2316,7 +2330,7 @@ the initial device.
 @end multitable
 
 @item @emph{See also}:
-@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy}
+@ref{omp_target_memcpy_rect_async}, @ref{omp_target_memcpy}, @ref{Offload-Target Specifics}
 
 @item @emph{Reference}:
 @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.6
@@ -2391,13 +2405,105 @@ the initial device.
 @end multitable
 
 @item @emph{See also}:
-@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async}
+@ref{omp_target_memcpy_rect}, @ref{omp_target_memcpy_async}, @ref{Offload-Target Specifics}
 
 @item @emph{Reference}:
 @uref{https://www.openmp.org, OpenMP specification v5.1}, Section 3.8.8
 @end table
 
 
+@node omp_target_memset
+@subsection @code{omp_target_memset} -- Set bytes in device memory
+@table @asis
+@item @emph{Description}:
+This routine fills memory on the device identified by device number
+@var{device_num}.  Starting from the device address @var{ptr}, the first
+@var{count} bytes are set to the value @var{val}, converted to
+@code{unsigned char}. If @var{count} is zero, the routine has no effect;
+if @var{ptr} is @code{NULL}, the behavior is unspecified.  The function
+returns @var{ptr}.
+
+The @var{device_num} must be a conforming device number and @var{ptr} must be
+a valid device pointer for that device.  Running this routine in a
+@code{target} region except on the initial device is not supported.
+
+@item @emph{C/C++}
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{void *omp_target_memcpy(void *ptr,}
+@item                   @tab @code{                        int val,}
+@item                   @tab @code{                        size_t count,}
+@item                   @tab @code{                        int device_num)}
+@end multitable
+
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{type(c_ptr) function omp_target_memset( &}
+@item                   @tab @code{    ptr, val, count, device_num) bind(C)}
+@item                   @tab @code{use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t, c_int}
+@item                   @tab @code{type(c_ptr), value :: ptr}
+@item                   @tab @code{integer(c_size_t), value :: count}
+@item                   @tab @code{integer(c_int), value :: val, device_num}
+@end multitable
+
+@item @emph{See also}:
+@ref{omp_target_memset_async}
+
+@item @emph{Reference}:
+@uref{https://www.openmp.org, OpenMP specification v6.0}, Section 25.8.1
+@end table
+
+
+
+@node omp_target_memset_async
+@subsection @code{omp_target_memset} -- Set bytes in device memory asynchronously
+@table @asis
+@item @emph{Description}:
+This routine fills memory on the device identified by device number
+@var{device_num}.  Starting from the device address @var{ptr}, the first
+@var{count} bytes are set to the value @var{val}, converted to
+@code{unsigned char}. If @var{count} is zero, the routine has no effect;
+if @var{ptr} is @code{NULL}, the behavior is unspecified.  Task dependence
+is expressed by passing an array of depend objects to @var{depobj_list}, where
+the number of array elements is passed as @var{depobj_count}; if the count is
+zero, the @var{depobj_list} argument is ignored.  In C++ and Fortran, the
+@var{depobj_list} argument can also be omitted in that case.  The function
+returns @var{ptr}.
+
+The @var{device_num} must be a conforming device number and @var{ptr} must be
+a valid device pointer for that device.  Running this routine in a
+@code{target} region except on the initial device is not supported.
+
+@item @emph{C/C++}
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{void *omp_target_memcpy_async(void *ptr,}
+@item                   @tab @code{                        int val,}
+@item                   @tab @code{                        size_t count,}
+@item                   @tab @code{                        int device_num,}
+@item                   @tab @code{                        int depobj_count,}
+@item                   @tab @code{                        omp_depend_t *depobj_list)}
+@end multitable
+
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{type(c_ptr) function omp_target_memset_async( &}
+@item                   @tab @code{    ptr, val, count, device_num, &}
+@item                   @tab @code{    depobj_count, depobj_list) bind(C)}
+@item                   @tab @code{use, intrinsic :: iso_c_binding, only: c_ptr, c_size_t, c_int}
+@item                   @tab @code{type(c_ptr), value :: ptr}
+@item                   @tab @code{integer(c_size_t), value :: count}
+@item                   @tab @code{integer(c_int), value :: val, device_num, depobj_count}
+@item                   @tab @code{integer(omp_depend_kind), optional :: depobj_list(*)}
+@end multitable
+
+
+@item @emph{See also}:
+@ref{omp_target_memset}
+
+@item @emph{Reference}:
+@uref{https://www.openmp.org, OpenMP specification v6.0}, Section 25.8.2
+@end table
+
+
 
 @node omp_target_associate_ptr
 @subsection @code{omp_target_associate_ptr} -- Associate a device pointer with a host pointer
@@ -3038,6 +3144,11 @@ and Fortran or used with @code{NULL} as argument in C and C++.  If successful,
 In GCC, the effect of running this routine in a @code{target} region that is not
 the initial device is unspecified.
 
+GCC implements the OpenMP 6.0 version of this function for C and C++, which is not
+compatible with its type signature in previous versions of the OpenMP specification.
+In older versions, the type @code{int*} was used for the @var{ret_code} argument
+in place of a pointer to the enumerated type @code{omp_interop_rc_t}.
+
 @c Implementation remark: In GCC, the Fortran interface differs from the one shown
 @c below: the function has C binding and @var{interop} and @var{property_id} are
 @c passed by value, which permits use of the same ABI as the C function.  This does
@@ -3084,6 +3195,11 @@ and Fortran or used with @code{NULL} as argument in C and C++.  If successful,
 In GCC, the effect of running this routine in a @code{target} region that is not
 the initial device is unspecified.
 
+GCC implements the OpenMP 6.0 version of this function for C and C++, which is not
+compatible with its type signature in previous versions of the OpenMP specification.
+In older versions, the type @code{int*} was used for the @var{ret_code} argument
+in place of a pointer to the enumerated type @code{omp_interop_rc_t}.
+
 @c Implementation remark: In GCC, the Fortran interface differs from the one shown
 @c below: the function has C binding and @var{interop} and @var{property_id} are
 @c passed by value, which permits use of the same ABI as the C function.  This does
@@ -3130,6 +3246,11 @@ and Fortran or used with @code{NULL} as argument in C and C++.  If successful,
 In GCC, the effect of running this routine in a @code{target} region that is not
 the initial device is unspecified.
 
+GCC implements the OpenMP 6.0 version of this function for C and C++, which is not
+compatible with its type signature in previous versions of the OpenMP specification.
+In older versions, the type @code{int*} was used for the @var{ret_code} argument
+in place of a pointer to the enumerated type @code{omp_interop_rc_t}.
+
 @c Implementation remark: In GCC, the Fortran interface differs from the one shown
 @c below: @var{interop} and @var{property_id} are passed by value.  This does not
 @c affect the usage of the function when GCC's @code{omp_lib} module or
@@ -3256,6 +3377,11 @@ the @var{ret_code} in human-readable form.
 The behavior is unspecified if value of @var{ret_code} was not set by an
 interoperability routine invoked for @var{interop}.
 
+GCC implements the OpenMP 6.0 version of this function for C and C++, which is not
+compatible with its type signature in previous versions of the OpenMP specification.
+In older versions, the type @code{int} was used for the @var{ret_code} argument
+in place of the enumerated type @code{omp_interop_rc_t}.
+
 @item @emph{C/C++}:
 @multitable @columnfractions .20 .80
 @item @emph{Prototype}: @tab @code{const char *omp_get_interop_rc_desc(const omp_interop_t interop,
@@ -3327,7 +3453,7 @@ traits; if an allocator that fulfills the requirements cannot be created,
 @code{omp_null_allocator} is returned.
 
 The predefined memory spaces and available traits can be found at
-@ref{OMP_ALLOCATOR}, where the trait names have to be prefixed by
+@ref{Memory allocation}, where the trait names have to be prefixed by
 @code{omp_atk_} (e.g. @code{omp_atk_pinned}) and the named trait values by
 @code{omp_atv_} (e.g. @code{omp_atv_true}); additionally, @code{omp_atv_default}
 may be used as trait value to specify that the default value should be used.
@@ -3350,7 +3476,7 @@ may be used as trait value to specify that the default value should be used.
 @end multitable
 
 @item @emph{See also}:
-@ref{OMP_ALLOCATOR}, @ref{Memory allocation}, @ref{omp_destroy_allocator}
+@ref{Memory allocation}, @ref{OMP_ALLOCATOR}, @ref{omp_destroy_allocator}
 
 @item @emph{Reference}:
 @uref{https://www.openmp.org, OpenMP specification v5.0}, Section 3.7.2
@@ -3931,63 +4057,15 @@ The value can either be a predefined allocator or a predefined memory space
 or a predefined memory space followed by a colon and a comma-separated list
 of memory trait and value pairs, separated by @code{=}.
 
+See @ref{Memory allocation} for a list of supported prefedined allocators,
+memory spaces, and traits.
+
 Note: The corresponding device environment variables are currently not
 supported.  Therefore, the non-host @var{def-allocator-var} ICVs are always
 initialized to @code{omp_default_mem_alloc}.  However, on all devices,
 the @code{omp_set_default_allocator} API routine can be used to change
 value.
 
-@multitable @columnfractions .45 .45
-@headitem Predefined allocators @tab Associated predefined memory spaces
-@item omp_default_mem_alloc     @tab omp_default_mem_space
-@item omp_large_cap_mem_alloc   @tab omp_large_cap_mem_space
-@item omp_const_mem_alloc       @tab omp_const_mem_space
-@item omp_high_bw_mem_alloc     @tab omp_high_bw_mem_space
-@item omp_low_lat_mem_alloc     @tab omp_low_lat_mem_space
-@item omp_cgroup_mem_alloc      @tab omp_low_lat_mem_space (implementation defined)
-@item omp_pteam_mem_alloc       @tab omp_low_lat_mem_space (implementation defined)
-@item omp_thread_mem_alloc      @tab omp_low_lat_mem_space (implementation defined)
-@item ompx_gnu_pinned_mem_alloc @tab omp_default_mem_space (GNU extension)
-@end multitable
-
-The predefined allocators use the default values for the traits,
-as listed below.  Except that the last three allocators have the
-@code{access} trait set to @code{cgroup}, @code{pteam}, and
-@code{thread}, respectively.
-
-@multitable @columnfractions .25 .40 .25
-@headitem Trait @tab Allowed values @tab Default value
-@item @code{sync_hint} @tab @code{contended}, @code{uncontended},
-                            @code{serialized}, @code{private}
-                       @tab @code{contended}
-@item @code{alignment} @tab Positive integer being a power of two
-                       @tab 1 byte
-@item @code{access}    @tab @code{all}, @code{cgroup},
-                            @code{pteam}, @code{thread}
-                       @tab @code{all}
-@item @code{pool_size} @tab Positive integer
-                       @tab See @ref{Memory allocation}
-@item @code{fallback}  @tab @code{default_mem_fb}, @code{null_fb},
-                            @code{abort_fb}, @code{allocator_fb}
-                       @tab See below
-@item @code{fb_data}   @tab @emph{unsupported as it needs an allocator handle}
-                       @tab (none)
-@item @code{pinned}    @tab @code{true}, @code{false}
-                       @tab See below
-@item @code{partition} @tab @code{environment}, @code{nearest},
-                            @code{blocked}, @code{interleaved}
-                       @tab @code{environment}
-@end multitable
-
-For the @code{fallback} trait, the default value is @code{null_fb} for the
-@code{omp_default_mem_alloc} allocator and any allocator that is associated
-with device memory; for all other allocators, it is @code{default_mem_fb}
-by default.
-
-For the @code{pinned} trait, the default value is @code{true} for
-predefined allocator @code{ompx_gnu_pinned_mem_alloc} (a GNU extension), and
-@code{false} for all others.
-
 Examples:
 @smallexample
 OMP_ALLOCATOR=omp_high_bw_mem_alloc
@@ -4763,6 +4841,7 @@ acceleration device.
                                 present on device.
 * acc_memcpy_to_device::        Copy host memory to device memory.
 * acc_memcpy_from_device::      Copy device memory to host memory.
+* acc_memcpy_device::           Copy memory within a device.
 * acc_attach::                  Let device pointer point to device-pointer target.
 * acc_detach::                  Let device pointer point to host-pointer target.
 
@@ -5837,6 +5916,44 @@ This function copies device memory specified by device address of
 
 
 
+@node acc_memcpy_device
+@section @code{acc_memcpy_device} -- Copy memory within a device.
+@table @asis
+@item @emph{Description}
+This function copies device memory from one memory location to another
+on the current device.  It copies @var{bytes} bytes of data from the device
+address, specified by @var{data_dev_src}, to the device address
+@var{data_dev_dest}.  The @code{_async} version performs the transfer
+asynchronously using the queue associated with @var{async_arg}.
+
+@item @emph{C/C++}:
+@multitable @columnfractions .20 .80
+@item @emph{Prototype}: @tab @code{void acc_memcpy_device(d_void* data_dev_dest,}
+@item                   @tab @code{d_void* data_dev_src, size_t bytes);}
+@item @emph{Prototype}: @tab @code{void acc_memcpy_device_async(d_void* data_dev_dest,}
+@item                   @tab @code{d_void* data_dev_src, size_t bytes, int async_arg);}
+@end multitable
+
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device(data_dev_dest, &}
+@item                   @tab @code{data_dev_src, bytes)}
+@item @emph{Interface}: @tab @code{subroutine acc_memcpy_device_async(data_dev_dest, &}
+@item                   @tab @code{data_dev_src, bytes, async_arg)}
+@item                   @tab @code{type(c_ptr), value :: data_dev_dest}
+@item                   @tab @code{type(c_ptr), value :: data_dev_src}
+@item                   @tab @code{integer(c_size_t), value :: bytes}
+@item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
+
+@item @emph{Reference}:
+@uref{https://www.openacc.org, OpenACC specification v2.6}, section
+3.2.33.  @uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.28.
+@end table
+
+
+
 @node acc_attach
 @section @code{acc_attach} -- Let device pointer point to device-pointer target.
 @table @asis
@@ -5850,19 +5967,19 @@ address to pointing to the corresponding device data.
 @item @emph{Prototype}: @tab @code{void acc_attach_async(h_void **ptr_addr, int async);}
 @end multitable
 
-@c @item @emph{Fortran}:
-@c @multitable @columnfractions .20 .80
-@c @item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)}
-@c @item                   @tab @code{type(*), dimension(..) :: ptr_addr}
-@c @item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
-@c @end multitable
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)}
+@item                   @tab @code{type(*), dimension(..) :: ptr_addr}
+@item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
 
 @item @emph{Reference}:
 @uref{https://www.openacc.org, OpenACC specification v2.6}, section
 3.2.34.
-@c  @uref{https://www.openacc.org, OpenACC specification v3.3}, section
-@c 3.2.29.
+ @uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.29.
 @end table
 
 
@@ -5882,21 +5999,21 @@ address to pointing to the corresponding host data.
 @item @emph{Prototype}: @tab @code{void acc_detach_finalize_async(h_void **ptr_addr, int async);}
 @end multitable
 
-@c @item @emph{Fortran}:
-@c @multitable @columnfractions .20 .80
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)}
-@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)}
-@c @item                   @tab @code{type(*), dimension(..) :: ptr_addr}
-@c @item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
-@c @end multitable
+@item @emph{Fortran}:
+@multitable @columnfractions .20 .80
+@item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)}
+@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)}
+@item                   @tab @code{type(*), dimension(..) :: ptr_addr}
+@item                   @tab @code{integer(acc_handle_kind), value :: async_arg}
+@end multitable
 
 @item @emph{Reference}:
 @uref{https://www.openacc.org, OpenACC specification v2.6}, section
 3.2.35.
-@c  @uref{https://www.openacc.org, OpenACC specification v3.3}, section
-@c 3.2.29.
+@uref{https://www.openacc.org, OpenACC specification v3.3}, section
+3.2.29.
 @end table
 
 
@@ -6718,6 +6835,7 @@ on more architectures, GCC currently does not match any @code{arch} or
       @tab See @code{-march=} in ``Nvidia PTX Options''
 @end multitable
 
+
 @node Memory allocation
 @section Memory allocation
 
@@ -6752,11 +6870,94 @@ The description below applies to:
       @code{_Alignof} and C++'s @code{alignof}.
 @end itemize
 
-For the available predefined allocators and, as applicable, their associated
-predefined memory spaces and for the available traits and their default values,
-see @ref{OMP_ALLOCATOR}.  Predefined allocators without an associated memory
-space use the @code{omp_default_mem_space} memory space.  See additionally
-@ref{Offload-Target Specifics}.
+GCC supports the following predefined allocators and predefined memory spaces:
+
+@multitable @columnfractions .45 .45
+@headitem Predefined allocators @tab Associated predefined memory spaces
+@item omp_default_mem_alloc     @tab omp_default_mem_space
+@item omp_large_cap_mem_alloc   @tab omp_large_cap_mem_space
+@item omp_const_mem_alloc       @tab omp_const_mem_space
+@item omp_high_bw_mem_alloc     @tab omp_high_bw_mem_space
+@item omp_low_lat_mem_alloc     @tab omp_low_lat_mem_space
+@item omp_cgroup_mem_alloc      @tab omp_low_lat_mem_space (implementation defined)
+@item omp_pteam_mem_alloc       @tab omp_low_lat_mem_space (implementation defined)
+@item omp_thread_mem_alloc      @tab omp_low_lat_mem_space (implementation defined)
+@item ompx_gnu_pinned_mem_alloc @tab omp_default_mem_space (GNU extension)
+@end multitable
+
+Each predefined allocator, including @code{omp_null_allocator}, has a corresponding
+allocator class template that meet the C++ allocator completeness requirements.
+These are located in the @code{omp::allocator} namespace, and the
+@code{ompx::allocator} namespace for gnu extensions.  This allows the
+allocator-aware C++ standard library containers to use OpenMP allocation routines;
+for instance:
+
+@smallexample
+std::vector<int, omp::allocator::cgroup_mem<int>> vec;
+@end smallexample
+
+The following allocator templates are supported:
+
+@multitable @columnfractions .45 .45
+@headitem Predefined allocators @tab Associated allocator template
+@item omp_null_allocator        @tab omp::allocator::null_allocator
+@item omp_default_mem_alloc     @tab omp::allocator::default_mem
+@item omp_large_cap_mem_alloc   @tab omp::allocator::large_cap_mem
+@item omp_const_mem_alloc       @tab omp::allocator::const_mem
+@item omp_high_bw_mem_alloc     @tab omp::allocator::high_bw_mem
+@item omp_low_lat_mem_alloc     @tab omp::allocator::low_lat_mem
+@item omp_cgroup_mem_alloc      @tab omp::allocator::cgroup_mem
+@item omp_pteam_mem_alloc       @tab omp::allocator::pteam_mem
+@item omp_thread_mem_alloc      @tab omp::allocator::thread_mem
+@item ompx_gnu_pinned_mem_alloc @tab ompx::allocator::gnu_pinned_mem
+@end multitable
+
+The following traits are available when constructing a new allocator;
+if a trait is not specified or with the value @code{default}, the
+specified default value is used for that trait.  The predefined
+allocators use the default values of each trait, except that the
+@code{omp_cgroup_mem_alloc}, @code{omp_pteam_mem_alloc}, and
+@code{omp_thread_mem_alloc} allocators have the @code{access} trait
+set to @code{cgroup}, @code{pteam}, and @code{thread}, respectively.
+For each trait, a named constant prefixed by @code{omp_atk_} exists;
+for each non-numeric value, a named constant prefixed by @code{omp_atv_}
+exists.
+
+@multitable @columnfractions .25 .40 .25
+@headitem Trait @tab Allowed values @tab Default value
+@item @code{sync_hint} @tab @code{contended}, @code{uncontended},
+                            @code{serialized}, @code{private}
+                       @tab @code{contended}
+@item @code{alignment} @tab Positive integer being a power of two
+                       @tab 1 byte
+@item @code{access}    @tab @code{all}, @code{cgroup},
+                            @code{pteam}, @code{thread}
+                       @tab @code{all}
+@item @code{pool_size} @tab Positive integer (bytes)
+                       @tab See below.
+@item @code{fallback}  @tab @code{default_mem_fb}, @code{null_fb},
+                            @code{abort_fb}, @code{allocator_fb}
+                       @tab See below
+@item @code{fb_data}   @tab @emph{allocator handle}
+                       @tab (none)
+@item @code{pinned}    @tab @code{true}, @code{false}
+                       @tab See below
+@item @code{partition} @tab @code{environment}, @code{nearest},
+                            @code{blocked}, @code{interleaved}
+                       @tab @code{environment}
+@end multitable
+
+For the @code{fallback} trait, the default value is @code{null_fb} for the
+@code{omp_default_mem_alloc} allocator and any allocator that is associated
+with device memory; for all other allocators, it is @code{default_mem_fb}
+by default.
+
+For the @code{pinned} trait, the default value is @code{true} for
+predefined allocator @code{ompx_gnu_pinned_mem_alloc} (a GNU extension), and
+@code{false} for all others.
+
+The following description applies to the initial device (the host) and largely
+also to non-host devices; for the latter, also see @ref{Offload-Target Specifics}.
 
 For the memory spaces, the following applies:
 @itemize
@@ -6771,14 +6972,16 @@ For the memory spaces, the following applies:
 @end itemize
 
 On Linux systems, where the @uref{https://github.com/memkind/memkind, memkind
-library} (@code{libmemkind.so.0}) is available at runtime, it is used when
-creating memory allocators requesting
+library} (@code{libmemkind.so.0}) is available at runtime and the respective
+memkind kind is supported, it is used when creating memory allocators requesting
 
 @itemize
-@item the memory space @code{omp_high_bw_mem_space}
-@item the memory space @code{omp_large_cap_mem_space}
-@item the @code{partition} trait @code{interleaved}; note that for
-      @code{omp_large_cap_mem_space} the allocation will not be interleaved
+@item the @code{partition} trait @code{interleaved} except when the memory space
+      is @code{omp_large_cap_mem_space} (uses @code{MEMKIND_HBW_INTERLEAVE})
+@item the memory space is @code{omp_high_bw_mem_space}  (uses
+      @code{MEMKIND_HBW_PREFERRED})
+@item the memory space is @code{omp_large_cap_mem_space} (uses
+      @code{MEMKIND_DAX_KMEM_ALL} or, if not available, @code{MEMKIND_DAX_KMEM})
 @end itemize
 
 On Linux systems, where the @uref{https://github.com/numactl/numactl, numa
@@ -6804,10 +7007,15 @@ a @code{nearest} allocation.
 Additional notes regarding the traits:
 @itemize
 @item The @code{pinned} trait is supported on Linux hosts, but is subject to
-      the OS @code{ulimit}/@code{rlimit} locked memory settings.
+      the OS @code{ulimit}/@code{rlimit} locked memory settings.  It currently
+      uses @code{mmap} and is therefore optimized for few allocations, including
+      large data.  If the conditions for numa or memkind allocations are
+      fulfilled, those allocators are used instead.
 @item The default for the @code{pool_size} trait is no pool and for every
       (re)allocation the associated library routine is called, which might
-      internally use a memory pool.
+      internally use a memory pool.  Currently, the same applies when a
+      @code{pool_size} has been specified, except that once allocations exceed
+      the the pool size, the action of the @code{fallback} trait applies.
 @item For the @code{partition} trait, the partition part size will be the same
       as the requested size (i.e. @code{interleaved} or @code{blocked} has no
       effect), except for @code{interleaved} when the memkind library is
@@ -6816,13 +7024,15 @@ Additional notes regarding the traits:
       that allocated the memory; on Linux, this is in particular the case when
       the memory placement policy is set to preferred.
 @item The @code{access} trait has no effect such that memory is always
-      accessible by all threads.
+      accessible by all threads. (Except on supported no-host devices.)
 @item The @code{sync_hint} trait has no effect.
 @end itemize
 
 See also:
 @ref{Offload-Target Specifics}
 
+
+
 @c ---------------------------------------------------------------------
 @c Offload-Target Specifics
 @c ---------------------------------------------------------------------
@@ -6888,7 +7098,7 @@ The implementation remark:
       @code{device(ancestor:1)}) are processed serially per @code{target} region
       such that the next reverse offload region is only executed after the previous
       one returned.
-@item OpenMP code that has a @code{requires} directive with
+@item OpenMP code that has a @code{requires} directive with @code{self_maps} or
       @code{unified_shared_memory} is only supported if all AMD GPUs have the
       @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT} property; for
       discrete GPUs, this may require setting the @code{HSA_XNACK} environment
@@ -6911,6 +7121,11 @@ The implementation remark:
       @code{omp_thread_mem_alloc}, all use low-latency memory as first
       preference, and fall back to main graphics memory when the low-latency
       pool is exhausted.
+@item The OpenMP routines @code{omp_target_memcpy_rect} and
+      @code{omp_target_memcpy_rect_async} and the @code{target update}
+      directive for non-contiguous list items use the 3D memory-copy function
+      of the HSA library.  Higher dimensions call this functions in a loop and
+      are therefore supported.
 @item The unique identifier (UID), used with OpenMP's API UID routines, is the
       value returned by the HSA runtime library for @code{HSA_AMD_AGENT_INFO_UUID}.
       For GPUs, it is currently @samp{GPU-} followed by 16 lower-case hex digits,
@@ -6940,6 +7155,9 @@ or string (str) data type, call @code{omp_get_interop_int},
 Note that @code{device_num} is the OpenMP device number
 while @code{device} is the HIP device number or HSA device handle.
 
+When using HIP with C and C++, the @code{__HIP_PLATFORM_AMD__} preprocessor
+macro must be defined before including the HIP header files.
+
 For the API routine call, add the prefix @code{omp_ipr_} to the property name;
 for instance:
 @smallexample
@@ -7040,7 +7258,7 @@ The implementation remark:
       Per device, reverse offload regions are processed serially such that
       the next reverse offload region is only executed after the previous
       one returned.
-@item OpenMP code that has a @code{requires} directive with
+@item OpenMP code that has a @code{requires} directive with @code{self_maps} or
       @code{unified_shared_memory} runs on nvptx devices if and only if
       all of those support the @code{pageableMemoryAccess} property;@footnote{
       @uref{https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements}}
@@ -7048,11 +7266,6 @@ The implementation remark:
       devices (``host fallback'').
 @item The default per-warp stack size is 128 kiB; see also @code{-msoft-stack}
       in the GCC manual.
-@item The OpenMP routines @code{omp_target_memcpy_rect} and
-      @code{omp_target_memcpy_rect_async} and the @code{target update}
-      directive for non-contiguous list items will use the 2D and 3D
-      memory-copy functions of the CUDA library.  Higher dimensions will
-      call those functions in a loop and are therefore supported.
 @item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the
       the @code{access} trait is set to @code{cgroup}, and libgomp has
       been built for PTX ISA version 4.1 or higher (such as in GCC's
@@ -7070,6 +7283,11 @@ The implementation remark:
       @code{omp_thread_mem_alloc}, all use low-latency memory as first
       preference, and fall back to main graphics memory when the low-latency
       pool is exhausted.
+@item The OpenMP routines @code{omp_target_memcpy_rect} and
+      @code{omp_target_memcpy_rect_async} and the @code{target update}
+      directive for non-contiguous list items use the 2D and 3D memory-copy
+      functions of the CUDA library.  Higher dimensions call those functions
+      in a loop and are therefore supported.
 @item The unique identifier (UID), used with OpenMP's API UID routines, consists
       of the @samp{GPU-} prefix followed by the 16-bytes UUID as returned by
       the CUDA runtime library.  This UUID is output in grouped lower-case
@@ -7102,6 +7320,9 @@ or string (str) data type, call @code{omp_get_interop_int},
 Note that @code{device_num} is the OpenMP device number while @code{device}
 is the CUDA, CUDA Driver, or HIP device number.
 
+When using HIP with C and C++, the @code{__HIP_PLATFORM_NVIDIA__} preprocessor
+macro must be defined before including the HIP header files.
+
 For the API routine call, add the prefix @code{omp_ipr_} to the property name;
 for instance:
 @smallexample
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 718252b..5b8ba7e 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -171,21 +171,22 @@ acc_free (void *d)
 }
 
 static void
-memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
-		      const char *libfnname)
+memcpy_tofrom_device (bool dev_to, bool dev_from, void *dst, void *src,
+		      size_t s, int async, const char *libfnname)
 {
   /* No need to call lazy open here, as the device pointer must have
      been obtained from a routine that did that.  */
   struct goacc_thread *thr = goacc_thread ();
 
   assert (thr && thr->dev);
+  if (s == 0)
+    return;
 
   if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
     {
-      if (from)
-	memmove (h, d, s);
-      else
-	memmove (d, h, s);
+      if (src == dst)
+	return;
+      memcpy (dst, src, s);
       return;
     }
 
@@ -199,10 +200,15 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
     }
 
   goacc_aq aq = get_goacc_asyncqueue (async);
-  if (from)
-    gomp_copy_dev2host (thr->dev, aq, h, d, s);
+  if (dev_to && dev_from)
+    {
+      if (dst != src)
+	gomp_copy_dev2dev (thr->dev, aq, dst, src, s);
+    }
+  else if (dev_from)
+    gomp_copy_dev2host (thr->dev, aq, dst, src, s);
   else
-    gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL);
+    gomp_copy_host2dev (thr->dev, aq, dst, src, s, false, /* TODO: cbuf? */ NULL);
 
   if (profiling_p)
     {
@@ -214,25 +220,37 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
 void
 acc_memcpy_to_device (void *d, void *h, size_t s)
 {
-  memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
+  memcpy_tofrom_device (true, false, d, h, s, acc_async_sync, __FUNCTION__);
 }
 
 void
 acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
 {
-  memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
+  memcpy_tofrom_device (true, false, d, h, s, async, __FUNCTION__);
 }
 
 void
 acc_memcpy_from_device (void *h, void *d, size_t s)
 {
-  memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
+  memcpy_tofrom_device (false, true, h, d, s, acc_async_sync, __FUNCTION__);
 }
 
 void
 acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
 {
-  memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
+  memcpy_tofrom_device (false, true, h, d, s, async, __FUNCTION__);
+}
+
+void
+acc_memcpy_device (void *dst, void *src, size_t s)
+{
+  memcpy_tofrom_device (true, true, dst, src, s, acc_async_sync, __FUNCTION__);
+}
+
+void
+acc_memcpy_device_async (void *dst, void *src, size_t s, int async)
+{
+  memcpy_tofrom_device (true, true, dst, src, s, async, __FUNCTION__);
 }
 
 /* Return the device pointer that corresponds to host data H.  Or NULL
@@ -951,7 +969,7 @@ acc_attach_async (void **hostaddr, int async)
     }
 
   gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
-		       0, NULL, false);
+		       0, NULL, false, true);
 
   gomp_mutex_unlock (&acc_dev->lock);
 }
@@ -1158,7 +1176,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 	  if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
 	    {
 	      gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
-				   (uintptr_t) h, s, NULL, false);
+				   (uintptr_t) h, s, NULL, false, true);
 	      /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
 		 reference counts ('n->refcount', 'n->dynamic_refcount').  */
 	    }
@@ -1176,7 +1194,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
 		  = lookup_host (acc_dev, hostaddrs[j], sizeof (void *));
 		gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m,
 				     (uintptr_t) hostaddrs[j], sizes[j], NULL,
-				     false);
+				     false, true);
 	      }
 
 	  bool processed = false;
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index d5e8be4..4f2bc46 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -347,6 +347,10 @@ extern int omp_target_memcpy_rect_async (void *, const void *, __SIZE_TYPE__,
 					 const __SIZE_TYPE__ *, int, int, int,
 					 omp_depend_t * __GOMP_DEFAULT_NULL)
   __GOMP_NOTHROW;
+extern void *omp_target_memset (void *, int, __SIZE_TYPE__, int) __GOMP_NOTHROW;
+extern void *omp_target_memset_async (void *, int, __SIZE_TYPE__, int,
+				      int, omp_depend_t * __GOMP_DEFAULT_NULL)
+  __GOMP_NOTHROW;
 extern int omp_target_associate_ptr (const void *, const void *, __SIZE_TYPE__,
 				     __SIZE_TYPE__, int) __GOMP_NOTHROW;
 extern int omp_target_disassociate_ptr (const void *, int) __GOMP_NOTHROW;
@@ -432,4 +436,136 @@ extern const char *omp_get_uid_from_device (int) __GOMP_NOTHROW;
 }
 #endif
 
+#if __cplusplus >= 201103L
+
+/* std::__throw_bad_alloc and std::__throw_bad_array_new_length.  */
+#include <bits/functexcept.h>
+
+namespace omp
+{
+namespace allocator
+{
+
+namespace __detail
+{
+
+template<typename __T, omp_allocator_handle_t __Handle>
+struct __allocator_templ
+{
+  using value_type = __T;
+  using pointer = __T*;
+  using const_pointer = const __T*;
+  using size_type = __SIZE_TYPE__;
+  using difference_type = __PTRDIFF_TYPE__;
+
+  __T*
+  allocate (size_type __n)
+  {
+    if (__SIZE_MAX__ / sizeof(__T) < __n)
+      std::__throw_bad_array_new_length ();
+    void *__p = omp_aligned_alloc (alignof(__T), __n * sizeof(__T), __Handle);
+    if (!__p)
+      std::__throw_bad_alloc ();
+    return static_cast<__T*>(__p);
+  }
+
+  void
+  deallocate (__T *__p, size_type) __GOMP_NOTHROW
+  {
+    omp_free (static_cast<void*>(__p), __Handle);
+  }
+};
+
+template<typename __T, typename __U, omp_allocator_handle_t __Handle>
+constexpr bool
+operator== (const __allocator_templ<__T, __Handle>&,
+	    const __allocator_templ<__U, __Handle>&) __GOMP_NOTHROW
+{
+  return true;
+}
+
+template<typename __T, omp_allocator_handle_t __Handle,
+	 typename __U, omp_allocator_handle_t __UHandle>
+constexpr bool
+operator== (const __allocator_templ<__T, __Handle>&,
+	    const __allocator_templ<__U, __UHandle>&) __GOMP_NOTHROW
+{
+  return false;
+}
+
+template<typename __T, typename __U, omp_allocator_handle_t __Handle>
+constexpr bool
+operator!= (const __allocator_templ<__T, __Handle>&,
+	    const __allocator_templ<__U, __Handle>&) __GOMP_NOTHROW
+{
+  return false;
+}
+
+template<typename __T, omp_allocator_handle_t __Handle,
+	 typename __U, omp_allocator_handle_t __UHandle>
+constexpr bool
+operator!= (const __allocator_templ<__T, __Handle>&,
+	    const __allocator_templ<__U, __UHandle>&) __GOMP_NOTHROW
+{
+  return true;
+}
+
+} /* namespace __detail */
+
+template<typename __T>
+struct null_allocator
+  : __detail::__allocator_templ<__T, omp_null_allocator> {};
+
+template<typename __T>
+struct default_mem
+  : __detail::__allocator_templ<__T, omp_default_mem_alloc> {};
+
+template<typename __T>
+struct large_cap_mem
+  : __detail::__allocator_templ<__T, omp_large_cap_mem_alloc> {};
+
+template<typename __T>
+struct const_mem
+  : __detail::__allocator_templ<__T, omp_const_mem_alloc> {};
+
+template<typename __T>
+struct high_bw_mem
+  : __detail::__allocator_templ<__T, omp_high_bw_mem_alloc> {};
+
+template<typename __T>
+struct low_lat_mem
+  : __detail::__allocator_templ<__T, omp_low_lat_mem_alloc> {};
+
+template<typename __T>
+struct cgroup_mem
+  : __detail::__allocator_templ<__T, omp_cgroup_mem_alloc> {};
+
+template<typename __T>
+struct pteam_mem
+  : __detail::__allocator_templ<__T, omp_pteam_mem_alloc> {};
+
+template<typename __T>
+struct thread_mem
+  : __detail::__allocator_templ<__T, omp_thread_mem_alloc> {};
+
+} /* namespace allocator */
+
+} /* namespace omp */
+
+namespace ompx
+{
+
+namespace allocator
+{
+
+template<typename __T>
+struct gnu_pinned_mem
+  : omp::allocator::__detail::__allocator_templ<__T, ompx_gnu_pinned_mem_alloc> {};
+
+} /* namespace allocator */
+
+} /* namespace ompx */
+
+#endif /* __cplusplus */
+
 #endif /* _OMP_H */
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index cb6b95f..ce866c0 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -904,6 +904,29 @@
         end interface
 
         interface
+          function omp_target_memset (ptr, val, count, device_num) bind(c)
+            use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
+            type(c_ptr) :: omp_target_memset
+            type(c_ptr), value :: ptr
+            integer(c_size_t), value :: count
+            integer(c_int), value :: val, device_num
+          end function omp_target_memset
+        end interface
+
+        interface
+          function omp_target_memset_async (ptr, val, count, device_num, &
+                                            depobj_count, depobj_list) bind(c)
+            use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
+            import :: omp_depend_kind
+            type(c_ptr) :: omp_target_memset_async
+            type(c_ptr), value :: ptr
+            integer(c_size_t), value :: count
+            integer(c_int), value :: val, device_num, depobj_count
+            integer(omp_depend_kind), optional :: depobj_list(*)
+          end function omp_target_memset_async
+        end interface
+
+        interface
           function omp_target_associate_ptr (host_ptr, device_ptr, size, &
                                              device_offset, device_num) bind(c)
             use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int
diff --git a/libgomp/omp_lib.h.in b/libgomp/omp_lib.h.in
index f7af5ff..9047095 100644
--- a/libgomp/omp_lib.h.in
+++ b/libgomp/omp_lib.h.in
@@ -505,6 +505,31 @@
       end interface
 
       interface
+        function omp_target_memset (ptr, val, count, device_num) bind(c)
+          use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
+          type(c_ptr) omp_target_memset
+          type(c_ptr), value :: ptr
+          integer(c_size_t), value :: count
+          integer(c_int), value :: val, device_num
+        end function omp_target_memset
+      end interface
+
+      interface
+        function omp_target_memset_async (ptr, val, count, device_num,          &
+     &                                    depobj_count, depobj_list)            &
+     &      bind(c)
+          use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
+          import :: omp_depend_kind
+          type(c_ptr) :: omp_target_memset_async
+          type(c_ptr), value :: ptr
+          integer(c_size_t), value :: count
+          integer(c_int), value :: val, device_num, depobj_count
+          integer(omp_depend_kind), optional :: depobj_list(*)
+        end function omp_target_memset_async
+      end interface
+
+
+      interface
         function omp_target_associate_ptr (host_ptr, device_ptr, size,          &
      &                                     device_offset, device_num)           &
      &      bind(c)
diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90
index 8ef107e..3f2db45 100644
--- a/libgomp/openacc.f90
+++ b/libgomp/openacc.f90
@@ -797,6 +797,9 @@ module openacc
   public :: acc_copyout_finalize, acc_delete_finalize
   public :: acc_memcpy_to_device, acc_memcpy_to_device_async
   public :: acc_memcpy_from_device, acc_memcpy_from_device_async
+  public :: acc_memcpy_device, acc_memcpy_device_async
+  public :: acc_attach, acc_attach_async, acc_detach, acc_detach_async
+  public :: acc_detach_finalize, acc_detach_finalize_async
 
   integer, parameter :: openacc_version = 201711
 
@@ -1046,6 +1049,69 @@ module openacc
     end subroutine
   end interface
 
+  interface
+    subroutine acc_memcpy_device (data_dev_dest, data_dev_src, bytes) bind(C)
+      use iso_c_binding, only: c_ptr, c_size_t
+      type(c_ptr), value :: data_dev_dest
+      type(c_ptr), value :: data_dev_src
+      integer(c_size_t), value :: bytes
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_memcpy_device_async (data_dev_dest, data_dev_src,  &
+                                        bytes, async_arg) bind(C)
+      use iso_c_binding, only: c_ptr, c_size_t
+      import :: acc_handle_kind
+      type(c_ptr), value :: data_dev_dest
+      type(c_ptr), value :: data_dev_src
+      integer(c_size_t), value :: bytes
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_attach (ptr_addr) bind(C)
+      type(*), dimension(..) :: ptr_addr
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_attach_async (ptr_addr, async_arg) bind(C)
+      import :: acc_handle_kind
+      type(*), dimension(..) :: ptr_addr
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach (ptr_addr) bind(C)
+      type(*), dimension(..) :: ptr_addr
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach_async (ptr_addr, async_arg) bind(C)
+      import :: acc_handle_kind
+      type(*), dimension(..) :: ptr_addr
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach_finalize (ptr_addr) bind(C)
+      type(*), dimension(..) :: ptr_addr
+    end subroutine
+  end interface
+
+  interface
+    subroutine acc_detach_finalize_async (ptr_addr, async_arg) bind(C)
+      import :: acc_handle_kind
+      type(*), dimension(..) :: ptr_addr
+      integer(acc_handle_kind), value :: async_arg
+    end subroutine
+  end interface
+
   interface acc_copyin_async
     procedure :: acc_copyin_async_32_h
     procedure :: acc_copyin_async_64_h
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index a520bbe..3085b00 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -123,6 +123,7 @@ void *acc_hostptr (void *) __GOACC_NOTHROW;
 int acc_is_present (void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
+void acc_memcpy_device (void *, void *, size_t) __GOACC_NOTHROW;
 void acc_attach (void **) __GOACC_NOTHROW;
 void acc_attach_async (void **, int) __GOACC_NOTHROW;
 void acc_detach (void **) __GOACC_NOTHROW;
@@ -136,7 +137,7 @@ void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_detach_finalize (void **) __GOACC_NOTHROW;
 void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
 
-/* Async functions, specified in OpenACC 2.5.  */
+/* Async functions, specified in OpenACC 2.5, acc_memcpy_device in 2.6.  */
 void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_create_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_copyout_async (void *, size_t, int) __GOACC_NOTHROW;
@@ -145,6 +146,7 @@ void acc_update_device_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_update_self_async (void *, size_t, int) __GOACC_NOTHROW;
 void acc_memcpy_to_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
 void acc_memcpy_from_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
+void acc_memcpy_device_async (void *, void *, size_t, int) __GOACC_NOTHROW;
 
 /* CUDA-specific routines.  */
 void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h
index b0d287e..dbdc4d7 100644
--- a/libgomp/openacc_lib.h
+++ b/libgomp/openacc_lib.h
@@ -528,6 +528,30 @@
         end subroutine
       end interface
 
+      interface
+        subroutine acc_memcpy_device(data_dev_dest, data_dev_src,       &
+     &                               bytes) bind(C)
+          use iso_c_binding, only: c_ptr, c_size_t
+          type(c_ptr), value :: data_dev_dest
+          type(c_ptr), value :: data_dev_src
+          integer(c_size_t), value :: bytes
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_memcpy_device_async(data_dev_dest,               &
+     &                                     data_dev_src, bytes,         &
+     &                                     async_arg) bind(C)
+          use iso_c_binding, only: c_ptr, c_size_t
+          import :: acc_handle_kind
+          type(c_ptr), value :: data_dev_dest
+          type(c_ptr), value :: data_dev_src
+          integer(c_size_t), value :: bytes
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
+
+
       interface acc_copyin_async
         subroutine acc_copyin_async_32_h (a, len, async)
           use iso_c_binding, only: c_int32_t
@@ -683,3 +707,45 @@
           integer (acc_handle_kind) async_
         end subroutine
       end interface
+
+      interface
+        subroutine acc_attach (ptr_addr) bind(C)
+          type(*), dimension(..) :: ptr_addr
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_attach_async (ptr_addr, async_arg) bind(C)
+          import :: acc_handle_kind
+          type(*), dimension(..) :: ptr_addr
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach (ptr_addr) bind(C)
+          type(*), dimension(..) :: ptr_addr
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach_async (ptr_addr, async_arg) bind(C)
+          import :: acc_handle_kind
+          type(*), dimension(..) :: ptr_addr
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach_finalize (ptr_addr) bind(C)
+          type(*), dimension(..) :: ptr_addr
+        end subroutine
+      end interface
+
+      interface
+        subroutine acc_detach_finalize_async(ptr_addr, async_arg)bind(C)
+          import :: acc_handle_kind
+          type(*), dimension(..) :: ptr_addr
+          integer(acc_handle_kind), value :: async_arg
+        end subroutine
+      end interface
diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def
index eb562ac..7f4ddcc 100644
--- a/libgomp/plugin/cuda-lib.def
+++ b/libgomp/plugin/cuda-lib.def
@@ -42,6 +42,7 @@ CUDA_ONE_CALL (cuMemcpyHtoDAsync)
 CUDA_ONE_CALL (cuMemcpy2D)
 CUDA_ONE_CALL (cuMemcpy2DUnaligned)
 CUDA_ONE_CALL (cuMemcpy3D)
+CUDA_ONE_CALL (cuMemsetD8)
 CUDA_ONE_CALL (cuMemFree)
 CUDA_ONE_CALL (cuMemFreeHost)
 CUDA_ONE_CALL (cuMemGetAddressRange)
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 4b42a59..498b549 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -208,6 +208,8 @@ struct hsa_runtime_fn_info
   hsa_status_t (*hsa_code_object_deserialize_fn)
     (void *serialized_code_object, size_t serialized_code_object_size,
      const char *options, hsa_code_object_t *code_object);
+  hsa_status_t (*hsa_amd_memory_fill_fn)(void *ptr, uint32_t value,
+					 size_t count);
   hsa_status_t (*hsa_amd_memory_lock_fn)
     (void *host_ptr, size_t size, hsa_agent_t *agents, int num_agent,
      void **agent_ptr);
@@ -1456,6 +1458,7 @@ init_hsa_runtime_functions (void)
   DLSYM_FN (hsa_signal_load_acquire)
   DLSYM_FN (hsa_queue_destroy)
   DLSYM_FN (hsa_code_object_deserialize)
+  DLSYM_OPT_FN (hsa_amd_memory_fill)
   DLSYM_OPT_FN (hsa_amd_memory_lock)
   DLSYM_OPT_FN (hsa_amd_memory_unlock)
   DLSYM_OPT_FN (hsa_amd_memory_async_copy_rect)
@@ -4435,6 +4438,83 @@ init_hip_runtime_functions (void)
   return true;
 }
 
+bool
+GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count)
+{
+  hsa_status_t status = HSA_STATUS_SUCCESS;
+
+  /* A memset feature is only provided via hsa_amd_memory_fill; while it
+     is fast, it is an HSA extension and it has two requirements: The memory
+     must be aligned to multiples of 4 bytes - and, by construction, only
+     multiples of 4 bytes can be filled (uint32_t value argument).
+
+     This means: Either not using that function or up to three function calls:
+     - copy 1 to 3 bytes to get alignment (hsa_memory_copy), if unaligned
+     - call hsa_amd_memory_fill
+     - copy remaining 1 to 3 bytes (hsa_memory_copy), if after alignment
+       count is not a multiple of 4 bytes.
+
+     Having more than one function call is only profitable if there is
+     enough data to process; see below for the used heuristic values.  */
+
+  uint8_t v8 = (uint8_t) val;
+  size_t before = (4 - (uintptr_t) ptr % 4) % 4;  /* 0 to 3 bytes.  */
+  size_t tail = (count - before) % 4;  /* 0 to 3 bytes.  */
+
+  /* Heuristic  */
+  enum {
+    /* Prefer alloca to malloc up to ... */
+    alloca_size = 256,  /* bytes */
+    /* Call hsa_amd_memory_fill also when two copy calls are required.  */
+    always_use_fill = 256*1024,  /* bytes */
+    /* Call hsa_amd_memory_fill also when on copy call is required.  */
+    use_fill_one_copy = (128+64)*1024  /* bytes */
+  };
+
+  /* Do not call hsa_amd_memory_fill when any of the following conditions
+     is true. Note that it is always preferred if available and
+     before == tail == 0.  */
+  if (__builtin_expect (!hsa_fns.hsa_amd_memory_fill_fn, 0)
+      || (before && tail && count < always_use_fill)
+      || ((before || tail) && count < use_fill_one_copy))
+    before = count;
+
+  /* Copy call for alignment - or all data, if condition above is true.  */
+  if (before)
+    {
+      void *data;
+      if (before > alloca_size)
+	data = malloc (before * sizeof (uint8_t));
+      else
+	data = alloca (before * sizeof (uint8_t));
+      memset (data, val, before);
+      status = hsa_fns.hsa_memory_copy_fn (ptr, data, before);
+      if (before > alloca_size)
+	free (data);
+      if (data == 0 || status != HSA_STATUS_SUCCESS)
+	goto fail;
+      count -= before;
+    }
+
+  if (count == 0)
+    return true;
+
+  ptr += before;
+
+  uint32_t values = v8 | (v8 << 8) | (v8 << 16) | (v8 << 24);
+  status = hsa_fns.hsa_amd_memory_fill_fn (ptr, values, count / 4);
+  if (tail && status == HSA_STATUS_SUCCESS)
+    {
+      ptr += count - tail;
+      status = hsa_fns.hsa_memory_copy_fn (ptr, &values, tail);
+    }
+  if (status == HSA_STATUS_SUCCESS)
+    return true;
+
+fail:
+  GOMP_PLUGIN_error ("memory set failed");
+  return false;
+}
 
 void
 GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord,
@@ -5079,7 +5159,8 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
   queue_push_callback (aq, fn, data);
 }
 
-/* Queue up an asynchronous data copy from host to DEVICE.  */
+/* Queue up an asynchronous data copy from host to DEVICE.
+   (Also handles dev2host and dev2dev.)  */
 
 bool
 GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src,
@@ -5097,10 +5178,16 @@ bool
 GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src,
 				     size_t n, struct goacc_asyncqueue *aq)
 {
-  struct agent_info *agent = get_agent_info (device);
-  assert (agent == aq->agent);
-  queue_push_copy (aq, dst, src, n);
-  return true;
+  return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq);
+}
+
+/* Queue up an asynchronous data copy from DEVICE to DEVICE.  */
+
+bool
+GOMP_OFFLOAD_openacc_async_dev2dev (int device, void *dst, const void *src,
+				    size_t n, struct goacc_asyncqueue *aq)
+{
+  return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq);
 }
 
 union goacc_property_value
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index a5cf859..0ba445e 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -2019,6 +2019,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
 }
 
 static bool
+cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s)
+{
+  CUdeviceptr pb1, pb2;
+  size_t ps1, ps2;
+  if (!s)
+    return true;
+  if (!d1 || !d2)
+    {
+      GOMP_PLUGIN_error ("invalid device address");
+      return false;
+    }
+  CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1);
+  CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2);
+  if (!pb1 || !pb2)
+    {
+      GOMP_PLUGIN_error ("invalid device address");
+      return false;
+    }
+  if ((void *)(d1 + s) > (void *)(pb1 + ps1)
+      || (void *)(d2 + s) > (void *)(pb2 + ps2))
+    {
+      GOMP_PLUGIN_error ("invalid size");
+      return false;
+    }
+  return true;
+}
+
+static bool
 cuda_memcpy_sanity_check (const void *h, const void *d, size_t s)
 {
   CUdeviceptr pb;
@@ -2077,6 +2105,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
 bool
 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
 {
+  if (!nvptx_attach_host_thread_to_device (ord)
+      || !cuda_memcpy_dev_sanity_check (dst, src, n))
+    return false;
   CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL);
   return true;
 }
@@ -2267,6 +2298,15 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size,
 }
 
 bool
+GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count)
+{
+  if (!nvptx_attach_host_thread_to_device (ord))
+    return false;
+  CUDA_CALL (cuMemsetD8, (CUdeviceptr) ptr, (unsigned char) val, count);
+  return true;
+}
+
+bool
 GOMP_OFFLOAD_openacc_async_host2dev (int ord, void *dst, const void *src,
 				     size_t n, struct goacc_asyncqueue *aq)
 {
@@ -2288,6 +2328,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src,
   return true;
 }
 
+bool
+GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src,
+				    size_t n, struct goacc_asyncqueue *aq)
+{
+  if (!nvptx_attach_host_thread_to_device (ord)
+      || !cuda_memcpy_dev_sanity_check (dst, src, n))
+    return false;
+  CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n,
+	     aq->cuda_stream);
+  return true;
+}
+
 union goacc_property_value
 GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop)
 {
diff --git a/libgomp/target-cxa-dso-dtor.c b/libgomp/target-cxa-dso-dtor.c
new file mode 100644
index 0000000..d1a898d
--- /dev/null
+++ b/libgomp/target-cxa-dso-dtor.c
@@ -0,0 +1,3 @@
+/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API */
+
+/* Nothing needed here.  */
diff --git a/libgomp/target.c b/libgomp/target.c
index a64ee96..cda092b 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -461,6 +461,19 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep,
     gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
 }
 
+attribute_hidden void
+gomp_copy_dev2dev (struct gomp_device_descr *devicep,
+		   struct goacc_asyncqueue *aq,
+		   void *dst, const void *src, size_t sz)
+{
+  if (__builtin_expect (aq != NULL, 0))
+    goacc_device_copy_async (devicep, devicep->openacc.async.dev2dev_func,
+			     "dev", dst, "dev", src, NULL, sz, aq);
+  else
+    gomp_device_copy (devicep, devicep->dev2dev_func, "dev", dst,
+		      "dev", src, sz);
+}
+
 static void
 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
 {
@@ -800,12 +813,22 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
 	      (void *) cur_node.host_end);
 }
 
-attribute_hidden void
+/* Update the devptr by setting it to the device address of the host pointee
+   'attach_to'; devptr is obtained from the splay_tree_key n.
+   When the pointer is already attached or the host pointee is either
+   NULL or in memory map, this function returns true.
+   Otherwise, the device pointer is set to point to the host pointee and:
+   - If allow_zero_length_array_sections is set, true is returned.
+   - Else, if fail_if_not_found is set, a fatal error is issued.
+   - Otherwise, false is returned.  */
+
+attribute_hidden bool
 gomp_attach_pointer (struct gomp_device_descr *devicep,
 		     struct goacc_asyncqueue *aq, splay_tree mem_map,
 		     splay_tree_key n, uintptr_t attach_to, size_t bias,
 		     struct gomp_coalesce_buf *cbufp,
-		     bool allow_zero_length_array_sections)
+		     bool allow_zero_length_array_sections,
+		     bool fail_if_not_found)
 {
   struct splay_tree_key_s s;
   size_t size, idx;
@@ -860,7 +883,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
 	  gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
 			      sizeof (void *), true, cbufp);
 
-	  return;
+	  return true;
 	}
 
       s.host_start = target + bias;
@@ -869,15 +892,16 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
 
       if (!tn)
 	{
-	  if (allow_zero_length_array_sections)
-	    /* When allowing attachment to zero-length array sections, we
-	       copy the host pointer when the target region is not mapped.  */
-	    data = target;
-	  else
+	  /* We copy the host pointer when the target region is not mapped;
+	     for allow_zero_length_array_sections, that's permitted.
+	     Otherwise, it depends on the context. Return false in that
+	     case, unless fail_if_not_found.  */
+	  if (!allow_zero_length_array_sections && fail_if_not_found)
 	    {
 	      gomp_mutex_unlock (&devicep->lock);
 	      gomp_fatal ("pointer target not mapped for attach");
 	    }
+	  data = target;
 	}
       else
 	data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
@@ -889,10 +913,13 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
 
       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
 			  sizeof (void *), true, cbufp);
+      if (!tn && !allow_zero_length_array_sections)
+	return false;
     }
   else
     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
 		(void *) attach_to, (int) n->aux->attach_count[idx]);
+  return true;
 }
 
 attribute_hidden void
@@ -1587,9 +1614,37 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
 		      bool zlas
 			= ((kind & typemask)
 			   == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
-		      gomp_attach_pointer (devicep, aq, mem_map, n,
-					   (uintptr_t) hostaddrs[i], sizes[i],
-					   cbufp, zlas);
+		      /* For 'target enter data', the map clauses are split;
+			 however, for more complex code with struct and
+			 pointer members, the mapping and the attach can end up
+			 in different sets; or the wrong mapping with the
+			 attach. As there is no way to know whether a size
+			 zero like  'var->ptr[i][:0]' happend in the same
+			 directive or not, the not-attached check is now
+			 fully silenced for 'enter data'.  */
+		      if (openmp_p && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA))
+			zlas = true;
+		      if (!gomp_attach_pointer (devicep, aq, mem_map, n,
+						(uintptr_t) hostaddrs[i], sizes[i],
+						cbufp, zlas, !openmp_p))
+			{
+			  /* Pointee not found; that's an error except for
+			     map(var[:n]) with n == 0; the compiler adds a
+			     runtime condition such that for those the kind is
+			     always GOMP_MAP_ZERO_LEN_ARRAY_SECTION.  */
+			  for (j = i; j > 0; j--)
+			    if (*(void**) hostaddrs[i] == hostaddrs[j-1] - sizes[i]
+				&& sizes[j-1] == 0
+				&& (GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+				    == (get_kind (short_mapkind, kinds, j-1)
+					& typemask)))
+			      break;
+			  if (j == 0)
+			    {
+			      gomp_mutex_unlock (&devicep->lock);
+			      gomp_fatal ("pointer target not mapped for attach");
+			    }
+			}
 		    }
 		  else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
 		    {
@@ -2586,6 +2641,10 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep,
     }
 }
 
+#define GOMP_REQUIRES_NAME_BUF_LEN \
+  sizeof ("unified_address, unified_shared_memory, " \
+	  "self_maps, reverse_offload")
+
 static void
 gomp_requires_to_name (char *buf, size_t size, int requires_mask)
 {
@@ -2634,10 +2693,8 @@ GOMP_offload_register_ver (unsigned version, const void *host_table,
 
   if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
     {
-      char buf1[sizeof ("unified_address, unified_shared_memory, "
-			"self_maps, reverse_offload")];
-      char buf2[sizeof ("unified_address, unified_shared_memory, "
-			"self_maps, reverse_offload")];
+      char buf1[GOMP_REQUIRES_NAME_BUF_LEN];
+      char buf2[GOMP_REQUIRES_NAME_BUF_LEN];
       gomp_requires_to_name (buf2, sizeof (buf2),
 			     omp_req != GOMP_REQUIRES_TARGET_USED
 			     ? omp_req : omp_requires_mask);
@@ -4948,6 +5005,88 @@ omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
   return 0;
 }
 
+static void
+omp_target_memset_int (void *ptr, int val, size_t count,
+		       struct gomp_device_descr *devicep)
+{
+  if (__builtin_expect (count == 0, 0))
+    return;
+  if (devicep == NULL)
+    {
+      memset (ptr, val, count);
+      return;
+    }
+
+  gomp_mutex_lock (&devicep->lock);
+  int ret = devicep->memset_func (devicep->target_id, ptr, val, count);
+  gomp_mutex_unlock (&devicep->lock);
+  if (!ret)
+    gomp_fatal ("omp_target_memset failed");
+}
+
+void*
+omp_target_memset (void *ptr, int val, size_t count, int device_num)
+{
+  struct gomp_device_descr *devicep;
+  if (device_num == omp_initial_device
+      || device_num == gomp_get_num_devices ()
+      || (devicep = resolve_device (device_num, false)) == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    devicep = NULL;
+
+  omp_target_memset_int (ptr, val, count, devicep);
+  return ptr;
+}
+
+typedef struct
+{
+  void *ptr;
+  size_t count;
+  struct gomp_device_descr *devicep;
+  int val;
+} omp_target_memset_data;
+
+static void
+omp_target_memset_async_helper (void *args)
+{
+  omp_target_memset_data *a = args;
+  omp_target_memset_int (a->ptr, a->val, a->count, a->devicep);
+}
+
+void*
+omp_target_memset_async (void *ptr, int val, size_t count, int device_num,
+			 int depobj_count, omp_depend_t *depobj_list)
+{
+  void *depend[depobj_count + 5];
+  struct gomp_device_descr *devicep;
+  unsigned flags = 0;
+  int i;
+
+  if (device_num == omp_initial_device
+      || device_num == gomp_get_num_devices ()
+      || (devicep = resolve_device (device_num, false)) == NULL
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    devicep = NULL;
+
+  omp_target_memset_data s = {.ptr = ptr, .val = val, .count = count,
+			      .devicep = devicep};
+  if (depobj_count > 0 && depobj_list != NULL)
+    {
+      flags |= GOMP_TASK_FLAG_DEPEND;
+      depend[0] = 0;
+      depend[1] = (void *) (uintptr_t) depobj_count;
+      depend[2] = depend[3] = depend[4] = 0;
+      for (i = 0; i < depobj_count; ++i)
+	depend[i + 5] = &depobj_list[i];
+    }
+
+  GOMP_task (omp_target_memset_async_helper, &s, NULL, sizeof (s),
+	     __alignof__ (s), true, flags, depend, 0, NULL);
+  return ptr;
+}
+
 int
 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
 			  size_t size, size_t device_offset, int device_num)
@@ -5513,6 +5652,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
       DLSYM_OPT (async_run, async_run);
       DLSYM_OPT (can_run, can_run);
       DLSYM (dev2dev);
+      DLSYM (memset);
     }
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
     {
@@ -5531,6 +5671,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
 	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
 	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
 	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
+	  || !DLSYM_OPT (openacc.async.dev2dev, openacc_async_dev2dev)
 	  || !DLSYM_OPT (openacc.get_property, openacc_get_property))
 	{
 	  /* Require all the OpenACC handlers if we have
@@ -5647,8 +5788,7 @@ gomp_target_init (void)
 		    found = true;
 		if (found)
 		  {
-		    char buf[sizeof ("unified_address, unified_shared_memory, "
-				     "reverse_offload")];
+		    char buf[GOMP_REQUIRES_NAME_BUF_LEN];
 		    gomp_requires_to_name (buf, sizeof (buf), omp_req);
 		    char *name = (char *) malloc (cur_len + 1);
 		    memcpy (name, cur, cur_len);
diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp
index bc38e3c..fd475ac 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -30,6 +30,7 @@ load_gcc_lib scandump.exp
 load_gcc_lib scanlang.exp
 load_gcc_lib scanrtl.exp
 load_gcc_lib scansarif.exp
+load_gcc_lib scanhtml.exp
 load_gcc_lib scantree.exp
 load_gcc_lib scanltrans.exp
 load_gcc_lib scanoffload.exp
@@ -553,7 +554,23 @@ int main() {
 } } "-lcuda" ]
 }
 
-# Return 1 if cublas_v2.h and -lcublas are available.
+# Return 1 if -lcuda is available (header not required).
+
+proc check_effective_target_openacc_libcuda { } {
+    return [check_no_compiler_messages openacc_libcuda executable {
+typedef enum { CUDA_SUCCESS } CUresult;
+typedef int CUdevice;
+CUresult cuDeviceGet (CUdevice *, int);
+int main() {
+    CUdevice dev;
+    CUresult r = cuDeviceGet (&dev, 0);
+    if (r != CUDA_SUCCESS)
+	return 1;
+    return 0;
+} } "-lcuda" ]
+}
+
+# Return 1 if cublas_v2.h, cuda.h, -lcublas and -lcuda are available.
 
 proc check_effective_target_openacc_cublas { } {
     return [check_no_compiler_messages openacc_cublas executable {
@@ -573,7 +590,25 @@ int main() {
 } } "-lcuda -lcublas" ]
 }
 
-# Return 1 if cuda_runtime_api.h and -lcudart are available.
+# Return 1 if -lcublas is available header not required).
+
+proc check_effective_target_openacc_libcublas { } {
+    return [check_no_compiler_messages openacc_libcublas executable {
+typedef enum { CUBLAS_STATUS_SUCCESS } cublasStatus_t;
+typedef struct cublasContext* cublasHandle_t;
+#define cublasCreate cublasCreate_v2
+cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+int main() {
+    cublasStatus_t s;
+    cublasHandle_t h;
+    s = cublasCreate (&h);
+    if (s != CUBLAS_STATUS_SUCCESS)
+	return 1;
+    return 0;
+} } "-lcublas" ]
+}
+
+# Return 1 if cuda_runtime_api.h, cuda.h, -lcuda and -lcudart are available.
 
 proc check_effective_target_openacc_cudart { } {
     return [check_no_compiler_messages openacc_cudart executable {
@@ -592,3 +627,98 @@ int main() {
     return 0;
 } } "-lcuda -lcudart" ]
 }
+
+# Return 1 if -lcudart is available (no header required).
+
+proc check_effective_target_openacc_libcudart { } {
+    return [check_no_compiler_messages openacc_libcudart executable {
+typedef int cudaError_t;
+cudaError_t cudaGetDevice(int *);
+enum { cudaSuccess };
+int main() {
+    cudaError_t e;
+    int devn;
+    e = cudaGetDevice (&devn);
+    if (e != cudaSuccess)
+	return 1;
+    return 0;
+} } "-lcudart" ]
+}
+
+# Return 1 if hip.h is available (no link check; AMD platform).
+
+proc check_effective_target_gomp_hip_header_amd { } {
+    return [check_no_compiler_messages gomp_hip_header_amd assembly {
+#define __HIP_PLATFORM_AMD__
+#include <hip/hip_runtime_api.h>
+int main() {
+    hipDevice_t dev;
+    hipError_t r = hipDeviceGet (&dev, 0);
+    if (r != hipSuccess)
+	return 1;
+    return 0;
+} }]
+}
+
+# Return 1 if hip.h is available (no link check; Nvidia/CUDA platform).
+
+proc check_effective_target_gomp_hip_header_nvidia { } {
+    return [check_no_compiler_messages gomp_hip_header_nvidia assembly {
+#define __HIP_PLATFORM_NVIDIA__
+#include <hip/hip_runtime_api.h>
+int main() {
+    hipDevice_t dev;
+    hipError_t r = hipDeviceGet (&dev, 0);
+    if (r != hipSuccess)
+	return 1;
+    return 0;
+} } "-Wno-deprecated-declarations"]
+}
+
+# Return 1 if the Fortran hipfort module is available (no link check)
+
+proc check_effective_target_gomp_hipfort_module { } {
+    return [check_no_compiler_messages gomp_hipfort_module assembly {
+! Fortran
+use hipfort
+implicit none
+integer(kind(hipSuccess)) :: r
+integer(c_int) :: dev
+r = hipDeviceGet (dev, 0)
+if (r /= hipSuccess) error stop
+end
+}]
+}
+
+# Return 1 if AMD HIP's -lamdhip64 is available (no header required).
+
+proc check_effective_target_gomp_libamdhip64 { } {
+    return [check_no_compiler_messages gomp_libamdhip64 executable {
+typedef int hipError_t;
+typedef int hipDevice_t;
+enum { hipSuccess = 0 };
+hipError_t hipDeviceGet(hipDevice_t*, int);
+int main() {
+    hipDevice_t dev;
+    hipError_t r = hipDeviceGet (&dev, 0);
+    if (r != hipSuccess)
+	return 1;
+    return 0;
+} } "-lamdhip64" ]
+}
+
+# Return 1 if AMD HIP's -lamdhip64 is available (no header required).
+
+proc check_effective_target_gomp_libhipblas { } {
+    return [check_no_compiler_messages gomp_libhipblas executable {
+typedef enum { HIPBLAS_STATUS_SUCCESS = 0 } hipblasStatus_t;
+typedef void* hipblasHandle_t;
+hipblasStatus_t hipblasCreate (hipblasHandle_t*);
+int main() {
+    hipblasHandle_t handle;
+    hipblasStatus_t stat = hipblasCreate (&handle);
+    if (stat != HIPBLAS_STATUS_SUCCESS)
+	return 1;
+    return 0;
+} } "-lhipblas" ]
+}
diff --git a/libgomp/testsuite/libgomp.c++/allocator-1.C b/libgomp/testsuite/libgomp.c++/allocator-1.C
new file mode 100644
index 0000000..49425386
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/allocator-1.C
@@ -0,0 +1,171 @@
+// { dg-do run }
+
+#include <omp.h>
+#include <memory>
+#include <limits>
+
+template<typename T, template<typename> class Alloc>
+void test (T const initial_value = T())
+{
+  using Allocator = Alloc<T>;
+  Allocator a;
+  using Traits = std::allocator_traits<Allocator>;
+  static_assert (__is_same(typename Traits::allocator_type,     Allocator       ));
+  static_assert (__is_same(typename Traits::value_type,         T               ));
+  static_assert (__is_same(typename Traits::pointer,            T*              ));
+  static_assert (__is_same(typename Traits::const_pointer,      T const*        ));
+  static_assert (__is_same(typename Traits::void_pointer,       void*           ));
+  static_assert (__is_same(typename Traits::const_void_pointer, void const*     ));
+  static_assert (__is_same(typename Traits::difference_type,    __PTRDIFF_TYPE__));
+  static_assert (__is_same(typename Traits::size_type,          __SIZE_TYPE__   ));
+  static_assert (Traits::propagate_on_container_copy_assignment::value == false);
+  static_assert (Traits::propagate_on_container_move_assignment::value == false);
+  static_assert (Traits::propagate_on_container_swap::value == false);
+  static_assert (Traits::is_always_equal::value == true);
+
+  static constexpr __SIZE_TYPE__ correct_max_size
+    = std::numeric_limits<__SIZE_TYPE__>::max () / sizeof (T);
+  if (Traits::max_size (a) != correct_max_size)
+    __builtin_abort ();
+
+  static constexpr __SIZE_TYPE__ alloc_count = 1;
+  T *p = Traits::allocate (a, alloc_count);
+  if (p == nullptr)
+    __builtin_abort ();
+  Traits::construct (a, p, initial_value);
+  if (*p != initial_value)
+    __builtin_abort ();
+  Traits::destroy (a, p);
+  Traits::deallocate (a, p, alloc_count);
+  /* Not interesting but might as well test it.  */
+  static_cast<void>(Traits::select_on_container_copy_construction (a));
+
+  if (!(a == Allocator()))
+    __builtin_abort ();
+  if (a != Allocator())
+    __builtin_abort ();
+  if (!(a == Alloc<void>()))
+    __builtin_abort ();
+  if (a != Alloc<void>())
+    __builtin_abort ();
+}
+
+#define CHECK_INEQUALITY(other_alloc_templ, type) \
+do {									\
+  /* Skip tests for itself, those are equal.  Intantiate each  */	\
+  /* one with void so we can easily tell if they are the same.  */	\
+  if (!__is_same (AllocTempl<void>, other_alloc_templ<void>))		\
+    {									\
+      other_alloc_templ<type> other;					\
+      if (a == other)							\
+	__builtin_abort ();						\
+      if (!(a != other))						\
+	__builtin_abort ();						\
+    }									\
+} while (false)
+
+template<typename T, template<typename> class AllocTempl>
+void test_inequality ()
+{
+  using Allocator = AllocTempl<T>;
+  Allocator a;
+  CHECK_INEQUALITY (omp::allocator::null_allocator, void);
+  CHECK_INEQUALITY (omp::allocator::default_mem, void);
+  CHECK_INEQUALITY (omp::allocator::large_cap_mem, void);
+  CHECK_INEQUALITY (omp::allocator::const_mem, void);
+  CHECK_INEQUALITY (omp::allocator::high_bw_mem, void);
+  CHECK_INEQUALITY (omp::allocator::low_lat_mem, void);
+  CHECK_INEQUALITY (omp::allocator::cgroup_mem, void);
+  CHECK_INEQUALITY (omp::allocator::pteam_mem, void);
+  CHECK_INEQUALITY (omp::allocator::thread_mem, void);
+#ifdef __gnu_linux__
+  /* Pinning not implemented on other targets.  */
+  CHECK_INEQUALITY (ompx::allocator::gnu_pinned_mem, void);
+#endif
+  /* And again with the same type passed to the allocator.  */
+  CHECK_INEQUALITY (omp::allocator::null_allocator, T);
+  CHECK_INEQUALITY (omp::allocator::default_mem, T);
+  CHECK_INEQUALITY (omp::allocator::large_cap_mem, T);
+  CHECK_INEQUALITY (omp::allocator::const_mem, T);
+  CHECK_INEQUALITY (omp::allocator::high_bw_mem, T);
+  CHECK_INEQUALITY (omp::allocator::low_lat_mem, T);
+  CHECK_INEQUALITY (omp::allocator::cgroup_mem, T);
+  CHECK_INEQUALITY (omp::allocator::pteam_mem, T);
+  CHECK_INEQUALITY (omp::allocator::thread_mem, T);
+#ifdef __gnu_linux__
+  CHECK_INEQUALITY (ompx::allocator::gnu_pinned_mem, T);
+#endif
+}
+
+#undef CHECK_INEQUALITY
+
+struct S
+{
+  int _v0;
+  bool _v1;
+  float _v2;
+
+  bool operator== (S const& other) const noexcept {
+    return _v0 == other._v0
+	   && _v1 == other._v1
+	   && _v2 == other._v2;
+  }
+  bool operator!= (S const& other) const noexcept {
+    return !this->operator==(other);
+  }
+};
+
+int main ()
+{
+  test<int, omp::allocator::null_allocator>(42);
+  test<int, omp::allocator::default_mem>(42);
+  test<int, omp::allocator::large_cap_mem>(42);
+  test<int, omp::allocator::const_mem>(42);
+  test<int, omp::allocator::high_bw_mem>(42);
+  test<int, omp::allocator::low_lat_mem>(42);
+  test<int, omp::allocator::cgroup_mem>(42);
+  test<int, omp::allocator::pteam_mem>(42);
+  test<int, omp::allocator::thread_mem>(42);
+#ifdef __gnu_linux__
+  test<int, ompx::allocator::gnu_pinned_mem>(42);
+#endif
+
+  test<long long, omp::allocator::null_allocator>(42);
+  test<long long, omp::allocator::default_mem>(42);
+  test<long long, omp::allocator::large_cap_mem>(42);
+  test<long long, omp::allocator::const_mem>(42);
+  test<long long, omp::allocator::high_bw_mem>(42);
+  test<long long, omp::allocator::low_lat_mem>(42);
+  test<long long, omp::allocator::cgroup_mem>(42);
+  test<long long, omp::allocator::pteam_mem>(42);
+  test<long long, omp::allocator::thread_mem>(42);
+#ifdef __gnu_linux__
+  test<long long, ompx::allocator::gnu_pinned_mem>(42);
+#endif
+
+  test<S, omp::allocator::null_allocator>( S{42, true, 128.f});
+  test<S, omp::allocator::default_mem>(    S{42, true, 128.f});
+  test<S, omp::allocator::large_cap_mem>(  S{42, true, 128.f});
+  test<S, omp::allocator::const_mem>(      S{42, true, 128.f});
+  test<S, omp::allocator::high_bw_mem>(    S{42, true, 128.f});
+  test<S, omp::allocator::low_lat_mem>(    S{42, true, 128.f});
+  test<S, omp::allocator::cgroup_mem>(     S{42, true, 128.f});
+  test<S, omp::allocator::pteam_mem>(      S{42, true, 128.f});
+  test<S, omp::allocator::thread_mem>(     S{42, true, 128.f});
+#ifdef __gnu_linux__
+  test<S, ompx::allocator::gnu_pinned_mem>(S{42, true, 128.f});
+#endif
+
+  test_inequality<int, omp::allocator::null_allocator>();
+  test_inequality<int, omp::allocator::default_mem>();
+  test_inequality<int, omp::allocator::large_cap_mem>();
+  test_inequality<int, omp::allocator::const_mem>();
+  test_inequality<int, omp::allocator::high_bw_mem>();
+  test_inequality<int, omp::allocator::low_lat_mem>();
+  test_inequality<int, omp::allocator::cgroup_mem>();
+  test_inequality<int, omp::allocator::pteam_mem>();
+  test_inequality<int, omp::allocator::thread_mem>();
+#ifdef __gnu_linux__
+  test_inequality<int, ompx::allocator::gnu_pinned_mem>();
+#endif
+}
diff --git a/libgomp/testsuite/libgomp.c++/allocator-2.C b/libgomp/testsuite/libgomp.c++/allocator-2.C
new file mode 100644
index 0000000..ca94fc7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/allocator-2.C
@@ -0,0 +1,141 @@
+// { dg-do run }
+// { dg-additional-options "-Wno-psabi" }
+
+#include <omp.h>
+#include <vector>
+
+template<typename T>
+bool ptr_is_aligned(T *ptr, std::size_t alignment)
+{
+  /* ALIGNMENT must be a power of 2.  */
+  if ((alignment & (alignment - 1)) != 0)
+    __builtin_abort ();
+  __UINTPTR_TYPE__ ptr_value
+    = reinterpret_cast<__UINTPTR_TYPE__>(static_cast<void*>(ptr));
+  return (ptr_value % alignment) == 0;
+}
+
+template<typename T, template<typename> class Alloc>
+void f (T v0, T v1, T v2, T v3)
+{
+  std::vector<T, Alloc<T>> vec;
+  vec.push_back (v0);
+  vec.push_back (v1);
+  vec.push_back (v2);
+  vec.push_back (v3);
+  if (vec.at (0) != v0)
+    __builtin_abort ();
+  if (vec.at (1) != v1)
+    __builtin_abort ();
+  if (vec.at (2) != v2)
+    __builtin_abort ();
+  if (vec.at (3) != v3)
+    __builtin_abort ();
+  if (!ptr_is_aligned (&vec.at (0), alignof (T)))
+    __builtin_abort ();
+  if (!ptr_is_aligned (&vec.at (1), alignof (T)))
+    __builtin_abort ();
+  if (!ptr_is_aligned (&vec.at (2), alignof (T)))
+    __builtin_abort ();
+  if (!ptr_is_aligned (&vec.at (3), alignof (T)))
+    __builtin_abort ();
+}
+
+struct S0
+{
+  int _v0;
+  bool _v1;
+  float _v2;
+
+  bool operator== (S0 const& other) const noexcept {
+    return _v0 == other._v0
+	   && _v1 == other._v1
+	   && _v2 == other._v2;
+  }
+  bool operator!= (S0 const& other) const noexcept {
+    return !this->operator==(other);
+  }
+};
+
+struct alignas(128) S1
+{
+  int _v0;
+  bool _v1;
+  float _v2;
+
+  bool operator== (S1 const& other) const noexcept {
+    return _v0 == other._v0
+	   && _v1 == other._v1
+	   && _v2 == other._v2;
+  }
+  bool operator!= (S1 const& other) const noexcept {
+    return !this->operator==(other);
+  }
+};
+
+/* Note: the test for const_mem should be disabled in the future.  */
+
+int main ()
+{
+  f<int, omp::allocator::null_allocator >(0, 1, 2, 3);
+  f<int, omp::allocator::default_mem    >(0, 1, 2, 3);
+  f<int, omp::allocator::large_cap_mem  >(0, 1, 2, 3);
+  f<int, omp::allocator::const_mem      >(0, 1, 2, 3);
+  f<int, omp::allocator::high_bw_mem    >(0, 1, 2, 3);
+  f<int, omp::allocator::low_lat_mem    >(0, 1, 2, 3);
+  f<int, omp::allocator::cgroup_mem     >(0, 1, 2, 3);
+  f<int, omp::allocator::pteam_mem      >(0, 1, 2, 3);
+  f<int, omp::allocator::thread_mem     >(0, 1, 2, 3);
+#ifdef __gnu_linux__
+  /* Pinning not implemented on other targets.  */
+  f<int, ompx::allocator::gnu_pinned_mem>(0, 1, 2, 3);
+#endif
+
+  f<long long, omp::allocator::null_allocator >(0, 1, 2, 3);
+  f<long long, omp::allocator::default_mem    >(0, 1, 2, 3);
+  f<long long, omp::allocator::large_cap_mem  >(0, 1, 2, 3);
+  f<long long, omp::allocator::const_mem      >(0, 1, 2, 3);
+  f<long long, omp::allocator::high_bw_mem    >(0, 1, 2, 3);
+  f<long long, omp::allocator::low_lat_mem    >(0, 1, 2, 3);
+  f<long long, omp::allocator::cgroup_mem     >(0, 1, 2, 3);
+  f<long long, omp::allocator::pteam_mem      >(0, 1, 2, 3);
+  f<long long, omp::allocator::thread_mem     >(0, 1, 2, 3);
+#ifdef __gnu_linux__
+  f<long long, ompx::allocator::gnu_pinned_mem>(0, 1, 2, 3);
+#endif
+
+  S0 s0_0{   42, true,  111128.f};
+  S0 s0_1{  142, false,  11128.f};
+  S0 s0_2{ 1142, true,    1128.f};
+  S0 s0_3{11142, false,    128.f};
+  f<S0, omp::allocator::null_allocator >(s0_0, s0_1, s0_2, s0_3);
+  f<S0, omp::allocator::default_mem    >(s0_0, s0_1, s0_2, s0_3);
+  f<S0, omp::allocator::large_cap_mem  >(s0_0, s0_1, s0_2, s0_3);
+  f<S0, omp::allocator::const_mem      >(s0_0, s0_1, s0_2, s0_3);
+  f<S0, omp::allocator::high_bw_mem    >(s0_0, s0_1, s0_2, s0_3);
+  f<S0, omp::allocator::low_lat_mem    >(s0_0, s0_1, s0_2, s0_3);
+  f<S0, omp::allocator::cgroup_mem     >(s0_0, s0_1, s0_2, s0_3);
+  f<S0, omp::allocator::pteam_mem      >(s0_0, s0_1, s0_2, s0_3);
+  f<S0, omp::allocator::thread_mem     >(s0_0, s0_1, s0_2, s0_3);
+#ifdef __gnu_linux__
+  f<S0, ompx::allocator::gnu_pinned_mem>(s0_0, s0_1, s0_2, s0_3);
+#endif
+
+  S1 s1_0{   42, true,  111128.f};
+  S1 s1_1{  142, false,  11128.f};
+  S1 s1_2{ 1142, true,    1128.f};
+  S1 s1_3{11142, false,    128.f};
+
+  f<S1, omp::allocator::null_allocator >(s1_0, s1_1, s1_2, s1_3);
+  f<S1, omp::allocator::default_mem    >(s1_0, s1_1, s1_2, s1_3);
+  f<S1, omp::allocator::large_cap_mem  >(s1_0, s1_1, s1_2, s1_3);
+  f<S1, omp::allocator::const_mem      >(s1_0, s1_1, s1_2, s1_3);
+  f<S1, omp::allocator::high_bw_mem    >(s1_0, s1_1, s1_2, s1_3);
+  f<S1, omp::allocator::low_lat_mem    >(s1_0, s1_1, s1_2, s1_3);
+  f<S1, omp::allocator::cgroup_mem     >(s1_0, s1_1, s1_2, s1_3);
+  f<S1, omp::allocator::pteam_mem      >(s1_0, s1_1, s1_2, s1_3);
+  f<S1, omp::allocator::thread_mem     >(s1_0, s1_1, s1_2, s1_3);
+#ifdef __gnu_linux__
+  f<S1, ompx::allocator::gnu_pinned_mem>(s1_0, s1_1, s1_2, s1_3);
+#endif
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-1.C b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C
new file mode 100644
index 0000000..aba4f42
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-1.C
@@ -0,0 +1,87 @@
+// { dg-do run }
+
+#include <cstdlib>
+#include <cassert>
+
+#define N 64
+
+struct points
+{
+  double *x;
+  double *y;
+  double *z;
+  size_t len;
+};
+
+#pragma omp declare mapper(points p) map(to:p.x, p.y, p.z) \
+				     map(p.x[0:p.len]) \
+				     map(p.y[0:p.len]) \
+				     map(p.z[0:p.len])
+
+struct shape
+{
+  points tmp;
+  points *pts;
+  int metadata[128];
+};
+
+#pragma omp declare mapper(shape s) map(tofrom:s.pts, *s.pts) map(alloc:s.tmp)
+
+void
+alloc_points (points *pts, size_t sz)
+{
+  pts->x = new double[sz];
+  pts->y = new double[sz];
+  pts->z = new double[sz];
+  pts->len = sz;
+  for (int i = 0; i < sz; i++)
+    pts->x[i] = pts->y[i] = pts->z[i] = 0;
+}
+
+int main (int argc, char *argv[])
+{
+  shape myshape;
+  points mypts;
+
+  myshape.pts = &mypts;
+
+  alloc_points (&myshape.tmp, N);
+  myshape.pts = new points;
+  alloc_points (myshape.pts, N);
+
+  #pragma omp target map(myshape)
+  {
+    for (int i = 0; i < N; i++)
+      {
+	myshape.pts->x[i]++;
+	myshape.pts->y[i]++;
+	myshape.pts->z[i]++;
+      }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (myshape.pts->x[i] == 1);
+      assert (myshape.pts->y[i] == 1);
+      assert (myshape.pts->z[i] == 1);
+    }
+
+  #pragma omp target
+  {
+    for (int i = 0; i < N; i++)
+      {
+	myshape.pts->x[i]++;
+	myshape.pts->y[i]++;
+	myshape.pts->z[i]++;
+      }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (myshape.pts->x[i] == 2);
+      assert (myshape.pts->y[i] == 2);
+      assert (myshape.pts->z[i] == 2);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-2.C b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C
new file mode 100644
index 0000000..d848fdb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-2.C
@@ -0,0 +1,55 @@
+// { dg-do run }
+
+#include <cassert>
+
+#define N 256
+
+struct doublebuf
+{
+  int buf_a[N][N];
+  int buf_b[N][N];
+};
+
+#pragma omp declare mapper(lo:doublebuf b) map(b.buf_a[0:N/2][0:N]) \
+					   map(b.buf_b[0:N/2][0:N])
+
+#pragma omp declare mapper(hi:doublebuf b) map(b.buf_a[N/2:N/2][0:N]) \
+					   map(b.buf_b[N/2:N/2][0:N])
+
+int main (int argc, char *argv[])
+{
+  doublebuf db;
+
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      db.buf_a[i][j] = db.buf_b[i][j] = 0;
+
+  #pragma omp target map(mapper(lo), tofrom:db)
+  {
+    for (int i = 0; i < N / 2; i++)
+      for (int j = 0; j < N; j++)
+	{
+	  db.buf_a[i][j]++;
+	  db.buf_b[i][j]++;
+	}
+  }
+
+  #pragma omp target map(mapper(hi), tofrom:db)
+  {
+    for (int i = N / 2; i < N; i++)
+      for (int j = 0; j < N; j++)
+	{
+	  db.buf_a[i][j]++;
+	  db.buf_b[i][j]++;
+	}
+  }
+
+  for (int i = 0; i < N; i++)
+    for (int j = 0; j < N; j++)
+      {
+	assert (db.buf_a[i][j] == 1);
+	assert (db.buf_b[i][j] == 1);
+      }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-3.C b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C
new file mode 100644
index 0000000..ea9b7de
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-3.C
@@ -0,0 +1,63 @@
+// { dg-do run }
+
+#include <cstdlib>
+#include <cassert>
+
+struct S {
+  int *myarr;
+};
+
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20])
+
+namespace A {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100])
+}
+
+namespace B {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100])
+}
+
+namespace A
+{
+  void incr_a (S my_s)
+  {
+#pragma omp target
+    {
+      for (int i = 0; i < 100; i++)
+	my_s.myarr[i]++;
+    }
+  }
+}
+
+namespace B
+{
+  void incr_b (S my_s)
+  {
+#pragma omp target
+    {
+      for (int i = 100; i < 200; i++)
+	my_s.myarr[i]++;
+    }
+  }
+}
+
+int main (int argc, char *argv[])
+{
+  S my_s;
+
+  my_s.myarr = (int *) calloc (200, sizeof (int));
+
+#pragma omp target
+  {
+    for (int i = 0; i < 20; i++)
+      my_s.myarr[i]++;
+  }
+
+  A::incr_a (my_s);
+  B::incr_b (my_s);
+
+  for (int i = 0; i < 200; i++)
+    assert (my_s.myarr[i] == (i < 20) ? 2 : 1);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-4.C b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C
new file mode 100644
index 0000000..f194e63
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-4.C
@@ -0,0 +1,63 @@
+// { dg-do run }
+
+#include <cstdlib>
+#include <cassert>
+
+struct S {
+  int *myarr;
+};
+
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:20])
+
+namespace A {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[0:100])
+}
+
+namespace B {
+#pragma omp declare mapper (S s) map(to:s.myarr) map (tofrom: s.myarr[100:100])
+}
+
+namespace A
+{
+  void incr_a (S &my_s)
+  {
+#pragma omp target
+    {
+      for (int i = 0; i < 100; i++)
+	my_s.myarr[i]++;
+    }
+  }
+}
+
+namespace B
+{
+  void incr_b (S &my_s)
+  {
+#pragma omp target
+    {
+      for (int i = 100; i < 200; i++)
+	my_s.myarr[i]++;
+    }
+  }
+}
+
+int main (int argc, char *argv[])
+{
+  S my_s;
+
+  my_s.myarr = (int *) calloc (200, sizeof (int));
+
+#pragma omp target
+  {
+    for (int i = 0; i < 20; i++)
+      my_s.myarr[i]++;
+  }
+
+  A::incr_a (my_s);
+  B::incr_b (my_s);
+
+  for (int i = 0; i < 200; i++)
+    assert (my_s.myarr[i] == (i < 20) ? 2 : 1);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-5.C b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C
new file mode 100644
index 0000000..0030de8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-5.C
@@ -0,0 +1,52 @@
+// { dg-do run }
+
+#include <cassert>
+
+struct S
+{
+  int *myarr;
+  int len;
+};
+
+class C
+{
+  S smemb;
+#pragma omp declare mapper (custom:S s) map(to:s.myarr) \
+					map(tofrom:s.myarr[0:s.len])
+
+public:
+  C(int l)
+  {
+    smemb.myarr = new int[l];
+    smemb.len = l;
+    for (int i = 0; i < l; i++)
+      smemb.myarr[i] = 0;
+  }
+  void bump();
+  void check();
+};
+
+void
+C::bump ()
+{
+#pragma omp target map(mapper(custom), tofrom: smemb)
+  {
+    for (int i = 0; i < smemb.len; i++)
+      smemb.myarr[i]++;
+  }
+}
+
+void
+C::check ()
+{
+  for (int i = 0; i < smemb.len; i++)
+    assert (smemb.myarr[i] == 1);
+}
+
+int main (int argc, char *argv[])
+{
+  C test (100);
+  test.bump ();
+  test.check ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-6.C b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C
new file mode 100644
index 0000000..14ed10d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-6.C
@@ -0,0 +1,37 @@
+// { dg-do run }
+
+#include <cassert>
+
+template <typename T>
+void adjust (T param)
+{
+#pragma omp declare mapper (T x) map(to:x.len, x.base) \
+				 map(tofrom:x.base[0:x.len])
+
+#pragma omp target
+  for (int i = 0; i < param.len; i++)
+    param.base[i]++;
+}
+
+struct S {
+  int len;
+  int *base;
+};
+
+int main (int argc, char *argv[])
+{
+  S a;
+
+  a.len = 100;
+  a.base = new int[a.len];
+
+  for (int i = 0; i < a.len; i++)
+    a.base[i] = 0;
+
+  adjust (a);
+
+  for (int i = 0; i < a.len; i++)
+    assert (a.base[i] == 1);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-7.C b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C
new file mode 100644
index 0000000..ba4792a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-7.C
@@ -0,0 +1,59 @@
+// { dg-do run }
+
+#include <cassert>
+
+struct S
+{
+  int *myarr;
+};
+
+struct T
+{
+  S *s;
+};
+
+#pragma omp declare mapper (s100: S x) map(to: x.myarr) \
+				       map(tofrom: x.myarr[0:100])
+// Define this because ...
+#pragma omp declare mapper (default: S x) map(to: x.myarr) \
+					  map(tofrom: x.myarr[0:100])
+
+
+void
+bump (T t)
+{
+  /* Here we have an implicit/default mapper invoking a named mapper.  We
+     need to make sure that can be located properly at gimplification
+     time.  */
+
+// ... the following is invalid in OpenMP - albeit supported by GCC
+// (after disabling:  error: in ‘declare mapper’ directives, parameter to ‘mapper’ modifier must be ‘default’ )
+
+// #pragma omp declare mapper (T t) map(to:t.s) map(mapper(s100), tofrom: t.s[0])
+
+// ... thus, we now use ...
+#pragma omp declare mapper (T t) map(to:t.s) map(mapper(default), tofrom: t.s[0])
+
+#pragma omp target
+  for (int i = 0; i < 100; i++)
+    t.s->myarr[i]++;
+}
+
+int main (int argc, char *argv[])
+{
+  S my_s;
+  T my_t;
+
+  my_s.myarr = new int[100];
+  my_t.s = &my_s;
+
+  for (int i = 0; i < 100; i++)
+    my_s.myarr[i] = 0;
+
+  bump (my_t);
+
+  for (int i = 0; i < 100; i++)
+    assert (my_s.myarr[i] == 1);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare-mapper-8.C b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C
new file mode 100644
index 0000000..3818e52
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare-mapper-8.C
@@ -0,0 +1,61 @@
+// { dg-do run }
+
+#include <cassert>
+
+struct S
+{
+  int *myarr;
+  int len;
+};
+
+template<typename T>
+class C
+{
+  T memb;
+#pragma omp declare mapper (T t) map(to:t.len, t.myarr) \
+				 map(tofrom:t.myarr[0:t.len])
+
+public:
+  C(int sz);
+  ~C();
+  void bump();
+  void check();
+};
+
+template<typename T>
+C<T>::C(int sz)
+{
+  memb.myarr = new int[sz];
+  for (int i = 0; i < sz; i++)
+    memb.myarr[i] = 0;
+  memb.len = sz;
+}
+
+template<typename T>
+C<T>::~C()
+{
+  delete[] memb.myarr;
+}
+
+template<typename T>
+void C<T>::bump()
+{
+#pragma omp target map(memb)
+  for (int i = 0; i < memb.len; i++)
+    memb.myarr[i]++;
+}
+
+template<typename T>
+void C<T>::check()
+{
+  for (int i = 0; i < memb.len; i++)
+    assert (memb.myarr[i] == 1);
+}
+
+int main(int argc, char *argv[])
+{
+  C<S> c_int(100);
+  c_int.bump();
+  c_int.check();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/declare_target-2.C b/libgomp/testsuite/libgomp.c++/declare_target-2.C
new file mode 100644
index 0000000..ab94a55
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/declare_target-2.C
@@ -0,0 +1,25 @@
+// { dg-do link }
+
+// Actually not needed: -fipa-cp is default with -O2:
+// { dg-additional-options "-O2 -fipa-cp" }
+
+// The code failed because 'std::endl' becoḿes implicitly 'declare target'
+// but not the 'widen' function it calls.  While the linker had no issues
+// (endl is never called, either because it is inlined or optimized away),
+// the IPA-CP (enabled by -O2 and higher) failed as the definition for
+// 'widen' did not exist on the offload side.
+
+#include <iostream>
+
+void func (int m)
+{
+  if (m < 0)
+    std::cout << "should not happen" << std::endl;
+}
+
+
+int main()
+{
+  #pragma omp target
+    func (1);
+}
diff --git a/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C b/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C
new file mode 100644
index 0000000..bcd499c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr106445-1-O0.C
@@ -0,0 +1,3 @@
+// { dg-additional-options -O0 }
+
+#include "pr106445-1.C"
diff --git a/libgomp/testsuite/libgomp.c++/pr106445-1.C b/libgomp/testsuite/libgomp.c++/pr106445-1.C
new file mode 100644
index 0000000..329ce62
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr106445-1.C
@@ -0,0 +1,18 @@
+#include <vector>
+
+int main()
+{
+#pragma omp target
+  {
+    {
+      std::vector<int> v;
+      if (!v.empty())
+	__builtin_abort();
+    }
+    {
+      std::vector<int> v(100);
+      if (v.capacity() < 100)
+	__builtin_abort();
+    }
+  }
+}
diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-1.C b/libgomp/testsuite/libgomp.c++/pr119692-1-1.C
new file mode 100644
index 0000000..1f59b15
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr119692-1-1.C
@@ -0,0 +1,10 @@
+/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */
+
+/* { dg-additional-options -UDEFAULT }
+   Wrong code for offloading execution.
+   { dg-additional-options -foffload=disable } */
+/* { dg-additional-options -fdump-tree-gimple } */
+
+#include "../libgomp.oacc-c++/pr119692-1-1.C"
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* map\(tofrom:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */
diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-2.C b/libgomp/testsuite/libgomp.c++/pr119692-1-2.C
new file mode 100644
index 0000000..e7ac818
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr119692-1-2.C
@@ -0,0 +1,11 @@
+/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */
+
+/* { dg-additional-options -DDEFAULT=defaultmap(none) }
+   Fails to compile.
+   { dg-do compile } */
+
+#include "pr119692-1-1.C"
+
+/* { dg-bogus {error: '_ZTV2C1' not specified in enclosing 'target'} PR119692 { xfail *-*-* } 0 }
+   { dg-bogus {error: '_ZTI2C2' not specified in enclosing 'target'} PR119692 { xfail *-*-* } 0 }
+   { dg-bogus {error: '_ZTI2C1' not specified in enclosing 'target'} PR119692 { xfail *-*-* } 0 } */
diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-3.C b/libgomp/testsuite/libgomp.c++/pr119692-1-3.C
new file mode 100644
index 0000000..733feb8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr119692-1-3.C
@@ -0,0 +1,10 @@
+/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */
+
+/* { dg-additional-options -DDEFAULT=defaultmap(present) }
+   Wrong code for offloading execution.
+   { dg-xfail-run-if PR119692 { offload_device } } */
+/* { dg-additional-options -fdump-tree-gimple } */
+
+#include "pr119692-1-1.C"
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(present\) map\(force_present:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(force_present:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(force_present:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */
diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-4.C b/libgomp/testsuite/libgomp.c++/pr119692-1-4.C
new file mode 100644
index 0000000..af9fe1c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr119692-1-4.C
@@ -0,0 +1,13 @@
+/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */
+
+/* { dg-additional-options -DDEFAULT=defaultmap(firstprivate) }
+   Wrong code for offloading execution.
+   { dg-xfail-run-if PR119692 { offload_device } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 } ... to make sure that happens quickly.  */
+/* { dg-additional-options -fdump-tree-gimple } */
+
+#include "pr119692-1-1.C"
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(firstprivate\) firstprivate\(_ZTI2C2\) firstprivate\(_ZTI2C1\) firstprivate\(_ZTV2C1\)$} gimple { xfail *-*-* } } } */
diff --git a/libgomp/testsuite/libgomp.c++/pr119692-1-5.C b/libgomp/testsuite/libgomp.c++/pr119692-1-5.C
new file mode 100644
index 0000000..e5c6e07
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/pr119692-1-5.C
@@ -0,0 +1,13 @@
+/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */
+
+/* { dg-additional-options -DDEFAULT=defaultmap(to) }
+   Wrong code for offloading execution.
+   { dg-xfail-run-if PR119692 { offload_device } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 } ... to make sure that happens quickly.  */
+/* { dg-additional-options -fdump-tree-gimple } */
+
+#include "pr119692-1-1.C"
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(to\) map\(to:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */
diff --git a/libgomp/testsuite/libgomp.c++/pr96390.C b/libgomp/testsuite/libgomp.c++/pr96390.C
index 1f3c3e0..be19601 100644
--- a/libgomp/testsuite/libgomp.c++/pr96390.C
+++ b/libgomp/testsuite/libgomp.c++/pr96390.C
@@ -1,6 +1,4 @@
 /* { dg-additional-options "-O0 -fdump-tree-omplower" } */
-/* { dg-additional-options "-foffload=-Wa,--verify" { target offload_target_nvptx } } */
-/* { dg-xfail-if "PR 97106/PR 97102 - .alias not (yet) supported for nvptx" { offload_target_nvptx } } */
 
 #include <cstdlib>
 #include <type_traits>
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-1.C b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
new file mode 100644
index 0000000..ecb029e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-1.C
@@ -0,0 +1,104 @@
+/* Offloaded C++ objects construction and destruction.  */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+  int x;
+
+  S()
+    : x(-1)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  S(int x)
+    : x(x)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  ~S()
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+};
+
+#pragma omp end declare target
+
+S sH1(7);
+
+#pragma omp declare target
+
+S sHD1(5);
+
+std::vector<S> svHD1(2);
+
+#pragma omp end declare target
+
+S sH2(3);
+
+int main()
+{
+  int c = 0;
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+  return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+   For the host, there are four expected calls:
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+
+   For the device, there are two expected calls:
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+*/
+
+/* C++ objects are constructed in order of appearance (..., and destructed in reverse order).
+   { dg-output {S, 7, 1[\r\n]+} }
+   { dg-output {S, 5, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, 3, 1[\r\n]+} }
+   { dg-output {main:1, 1[\r\n]+} }
+   { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:4, 1[\r\n]+} }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, 3, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, 5, 1[\r\n]+} }
+   { dg-output {~S, 7, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c++/target-cdtor-2.C b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
new file mode 100644
index 0000000..75e48ca
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-cdtor-2.C
@@ -0,0 +1,140 @@
+/* Offloaded 'constructor' and 'destructor' functions, and C++ objects construction and destruction.  */
+
+/* { dg-require-effective-target init_priority } */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+  int x;
+
+  S()
+    : x(-1)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  S(int x)
+    : x(x)
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+  ~S()
+  {
+    __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+  }
+};
+
+#pragma omp end declare target
+
+S sH1 __attribute__((init_priority(1500))) (7);
+
+#pragma omp declare target
+
+S sHD1 __attribute__((init_priority(2000))) (5);
+
+std::vector<S> svHD1 __attribute__((init_priority(1000))) (2);
+
+static void
+__attribute__((constructor(20000)))
+initDH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(20000)))
+finiDH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+#pragma omp end declare target
+
+S sH2 __attribute__((init_priority(500))) (3);
+
+static void
+__attribute__((constructor(10000)))
+initH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(10000)))
+finiH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+int main()
+{
+  int c = 0;
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+  return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+   For the host, there are four expected calls:
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+
+   For the device, there are two expected calls:
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+*/
+
+/* Defined order in which 'constructor' functions, and 'destructor' functions are run, and C++ objects are constructed (..., and destructed in reverse order).
+   { dg-output {S, 3, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, -1, 1[\r\n]+} }
+   { dg-output {S, 7, 1[\r\n]+} }
+   { dg-output {S, 5, 1[\r\n]+} }
+   { dg-output {initH1, 1[\r\n]+} }
+   { dg-output {initDH1, 1[\r\n]+} }
+   { dg-output {main:1, 1[\r\n]+} }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {initDH1, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:2, 1[\r\n]+} { target  { ! offload_device } } }
+   { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:4, 1[\r\n]+} }
+   { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+   { dg-output {finiDH1, 0[\r\n]+} { target offload_device } }
+   { dg-output {~S, 5, 1[\r\n]+} }
+   { dg-output {~S, 7, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, -1, 1[\r\n]+} }
+   { dg-output {~S, 3, 1[\r\n]+} }
+   { dg-output {finiDH1, 1[\r\n]+} }
+   { dg-output {finiH1, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C
new file mode 100644
index 0000000..a862652
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-1.C
@@ -0,0 +1,28 @@
+/* 'std::bad_cast' exception in OpenMP 'target' region.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "../libgomp.oacc-c++/exceptions-bad_cast-1.C"
+
+/* { dg-output {CheCKpOInT[\r\n]+} }
+
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   For host execution, we print something like:
+       terminate called after throwing an instance of 'std::bad_cast'
+         what():  std::bad_cast
+       Aborted (core dumped)
+   { dg-output {.*std::bad_cast} { target { ! offload_device } } }
+   For GCN, nvptx offload execution, we don't print anything, but just 'abort'.
+
+   TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to
+   the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast';
+   PR119692.
+
+   { dg-shouldfail {'std::bad_cast' exception} } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 } ... to make sure that happens quickly.  */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C
new file mode 100644
index 0000000..93884df
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-GCN.C
@@ -0,0 +1,19 @@
+/* 'std::bad_cast' exception in OpenMP 'target' region, caught, '-foffload-options=-mno-fake-exceptions'.  */
+
+/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually.
+   { dg-do link { target offload_target_amdgcn } }
+   { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "target-exceptions-bad_cast-2.C"
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   Given '-foffload-options=-mno-fake-exceptions', offload compilation fails:
+   { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} }
+   (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.)
+   { dg-excess-errors {'mkoffload' failure etc.} } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C
new file mode 100644
index 0000000..83ec89b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2-offload-sorry-nvptx.C
@@ -0,0 +1,19 @@
+/* 'std::bad_cast' exception in OpenMP 'target' region, caught, '-foffload-options=-mno-fake-exceptions'.  */
+
+/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually.
+   { dg-do link { target offload_target_nvptx } }
+   { dg-additional-options -foffload=nvptx-none } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "target-exceptions-bad_cast-2.C"
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   Given '-foffload-options=-mno-fake-exceptions', offload compilation fails:
+   { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} }
+   (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.)
+   { dg-excess-errors {'mkoffload' failure etc.} } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C
new file mode 100644
index 0000000..ff15c9f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-2.C
@@ -0,0 +1,27 @@
+/* 'std::bad_cast' exception in OpenMP 'target' region, caught.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+/* { dg-bogus {_ZTISt8bad_cast} PR119734 { target offload_target_nvptx xfail *-*-* } 0 }
+   { dg-excess-errors {'mkoffload' failure etc.} { xfail offload_target_nvptx } } */
+
+#include "../libgomp.oacc-c++/exceptions-bad_cast-2.C"
+
+/* { dg-output {CheCKpOInT[\r\n]+} }
+
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-output {.*caught 'std::bad_cast'[\r\n]+} { target { ! offload_device } } }
+   For GCN, nvptx offload execution, we don't print anything, but just 'abort'.
+
+   TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to
+   the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast';
+   PR119692.
+
+   For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal.
+   { dg-shouldfail {'MyException' exception} { offload_device } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 } ... to make sure that happens quickly.  */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-3.C b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-3.C
new file mode 100644
index 0000000..efed64f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-bad_cast-3.C
@@ -0,0 +1,17 @@
+/* 'std::bad_cast' exception in OpenMP 'target' region, dead code.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -DDEFAULT=defaultmap(to) }
+   ... to avoid wrong code for offloading execution; PR119692.
+   With this, the device code still isn't correct, but the defects are in dead code.
+   { dg-additional-options -fdump-tree-gimple } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "../libgomp.oacc-c++/exceptions-bad_cast-3.C"
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(to\) map\(to:_ZTI2C2 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTI2C1 \[len: [0-9]+\] \[runtime_implicit\]\) map\(to:_ZTV2C1 \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
new file mode 100644
index 0000000..d4dccf1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-GCN.C
@@ -0,0 +1,26 @@
+/* Exception handling constructs in dead code, '-foffload-options=-mno-fake-exceptions'.  */
+
+/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually.
+   { dg-do link { target offload_target_amdgcn } }
+   { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */
+/* { dg-additional-options -O0 } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "target-exceptions-pr118794-1.C"
+
+/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
+   '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   ..., just 'targetm.arm_eabi_unwinder' is different:
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
+   { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
+   { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+   Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails:
+   { dg-regexp {[^\r\n]+: In function 'f':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} }
+   (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.)
+   { dg-excess-errors {'mkoffload' failure etc.} } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
new file mode 100644
index 0000000..724e34b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1-offload-sorry-nvptx.C
@@ -0,0 +1,26 @@
+/* Exception handling constructs in dead code, '-foffload-options=-mno-fake-exceptions'.  */
+
+/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually.
+   { dg-do link { target offload_target_nvptx } }
+   { dg-additional-options -foffload=nvptx-none } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */
+/* { dg-additional-options -O0 } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "target-exceptions-pr118794-1.C"
+
+/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
+   '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   ..., just 'targetm.arm_eabi_unwinder' is different:
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
+   { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
+   { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } }
+   Given '-O0' and '-foffload-options=-mno-fake-exceptions', offload compilation fails:
+   { dg-regexp {[^\r\n]+: In function 'f':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} }
+   (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.)
+   { dg-excess-errors {'mkoffload' failure etc.} } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
new file mode 100644
index 0000000..24eb7a5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C
@@ -0,0 +1,59 @@
+/* Exception handling constructs in dead code.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -O0 } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-pr118794-1.C',
+   '../../../gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1.C'.  */
+
+#pragma omp begin declare target
+
+bool ok = false;
+
+template <typename T>
+struct C
+{
+  C()
+  {
+    ok = true;
+  }
+  C(int) {};
+  ~C() {};
+
+  __attribute__((noipa))
+  void m()
+  {
+    C c;
+  }
+};
+
+inline void f()
+{
+  C<double> c(1);
+  c.m();
+}
+
+#pragma omp end declare target
+
+int main()
+{
+#pragma omp target
+  {
+    f();
+  }
+#pragma omp target update from(ok)
+  if (!ok)
+    __builtin_abort();
+}
+
+/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes
+   '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f':
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized { target { ! { arm_eabi || tic6x-*-* } } } } }
+   ..., just 'targetm.arm_eabi_unwinder' is different:
+   { dg-final { scan-tree-dump-times {gimple_call <__builtin_cxa_end_cleanup, } 1 optimized { target { arm_eabi || tic6x-*-* } } } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1-O0.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1-O0.C
new file mode 100644
index 0000000..00d7c13
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1-O0.C
@@ -0,0 +1,23 @@
+/* 'throw' in OpenMP 'target' region.  */
+
+/* { dg-additional-options -O0 } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "target-exceptions-throw-1.C"
+
+/* { dg-output {CheCKpOInT[\r\n]+} }
+
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   For host execution, we print something like:
+       terminate called after throwing an instance of 'MyException'
+       Aborted (core dumped)
+   { dg-output {.*MyException} { target { ! offload_device } } }
+   For GCN, nvptx offload execution, we don't print anything, but just 'abort'.
+
+   { dg-shouldfail {'MyException' exception} } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
new file mode 100644
index 0000000..a4e7a10
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-1.C
@@ -0,0 +1,22 @@
+/* 'throw' in OpenMP 'target' region.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "../libgomp.oacc-c++/exceptions-throw-1.C"
+
+/* { dg-output {CheCKpOInT[\r\n]+} }
+
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   For host execution, we print something like:
+       terminate called after throwing an instance of 'MyException'
+       Aborted (core dumped)
+   { dg-output {.*MyException} { target { ! offload_device } } }
+   For GCN, nvptx offload execution, we don't print anything, but just 'abort'.
+
+   { dg-shouldfail {'MyException' exception} } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-O0.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-O0.C
new file mode 100644
index 0000000..b7a311d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-O0.C
@@ -0,0 +1,25 @@
+/* 'throw' in OpenMP 'target' region, caught.  */
+
+/* { dg-additional-options -O0 } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+/* { dg-bogus {undefined symbol: typeinfo name for MyException} PR119806 { target offload_target_amdgcn xfail *-*-* } 0 }
+   { dg-excess-errors {'mkoffload' failure etc.} { xfail offload_target_amdgcn } } */
+/* { dg-bogus {Initial value type mismatch} PR119806 { target offload_target_nvptx xfail *-*-* } 0 }
+   { dg-excess-errors {'mkoffload' failure etc.} { xfail offload_target_nvptx } } */
+
+#include "target-exceptions-throw-2.C"
+
+/* { dg-output {CheCKpOInT[\r\n]+} }
+
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-output {.*caught 'MyException'[\r\n]+} { target { ! offload_device } } }
+   For GCN, nvptx offload execution, we don't print anything, but just 'abort'.
+
+   For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal.
+   { dg-shouldfail {'MyException' exception} { offload_device } } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C
new file mode 100644
index 0000000..9905b1f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-GCN.C
@@ -0,0 +1,21 @@
+/* 'throw' in OpenMP 'target' region, caught, -foffload-options=-mno-fake-exceptions.  */
+
+/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually.
+   { dg-do link { target offload_target_amdgcn } }
+   { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "target-exceptions-throw-2.C"
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   Given '-foffload-options=-mno-fake-exceptions', offload compilation fails:
+   { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} }
+   (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.)
+   { dg-excess-errors {'mkoffload' failure etc.} } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C
new file mode 100644
index 0000000..da267d6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2-offload-sorry-nvptx.C
@@ -0,0 +1,21 @@
+/* 'throw' in OpenMP 'target' region, caught, '-foffload-options=-mno-fake-exceptions'.  */
+
+/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually.
+   { dg-do link { target offload_target_nvptx } }
+   { dg-additional-options -foffload=nvptx-none } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "target-exceptions-throw-2.C"
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   Given '-foffload-options=-mno-fake-exceptions', offload compilation fails:
+   { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} }
+   (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.)
+   { dg-excess-errors {'mkoffload' failure etc.} } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
new file mode 100644
index 0000000..97f4845
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-2.C
@@ -0,0 +1,20 @@
+/* 'throw' in OpenMP 'target' region, caught.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "../libgomp.oacc-c++/exceptions-throw-2.C"
+
+/* { dg-output {CheCKpOInT[\r\n]+} }
+
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-output {.*caught 'MyException'[\r\n]+} { target { ! offload_device } } }
+   For GCN, nvptx offload execution, we don't print anything, but just 'abort'.
+
+   For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal.
+   { dg-shouldfail {'MyException' exception} { offload_device } } */
diff --git a/libgomp/testsuite/libgomp.c++/target-exceptions-throw-3.C b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-3.C
new file mode 100644
index 0000000..c35180d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-exceptions-throw-3.C
@@ -0,0 +1,19 @@
+/* 'throw' in OpenMP 'target' region, dead code.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -DDEFAULT=defaultmap(to) }
+   ... to avoid wrong code for offloading execution; PR119692.
+   With this, the device code still isn't correct, but the defects are in dead code.
+   { dg-additional-options -fdump-tree-gimple } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "../libgomp.oacc-c++/exceptions-throw-3.C"
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target .* defaultmap\(to\) map\(to:_ZTI11MyException \[len: [0-9]+\] \[runtime_implicit\]\)$} gimple { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } */
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-10.C b/libgomp/testsuite/libgomp.c++/target-flex-10.C
new file mode 100644
index 0000000..8fa9af7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-10.C
@@ -0,0 +1,215 @@
+/* Basic container usage.  */
+
+#include <vector>
+#include <deque>
+#include <list>
+#include <set>
+#include <map>
+#if __cplusplus >= 201103L
+#include <array>
+#include <forward_list>
+#include <unordered_set>
+#include <unordered_map>
+#endif
+
+bool vector_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::vector<int> vector;
+      ok = vector.empty();
+    }
+  return ok;
+}
+
+bool deque_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::deque<int> deque;
+      ok = deque.empty();
+    }
+  return ok;
+}
+
+bool list_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::list<int> list;
+      ok = list.empty();
+    }
+  return ok;
+}
+
+bool map_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::map<int, int> map;
+      ok = map.empty();
+    }
+  return ok;
+}
+
+bool set_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::set<int> set;
+      ok = set.empty();
+    }
+  return ok;
+}
+
+bool multimap_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::multimap<int, int> multimap;
+      ok = multimap.empty();
+    }
+  return ok;
+}
+
+bool multiset_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::multiset<int, int> multiset;
+      ok = multiset.empty();
+    }
+  return ok;
+}
+
+#if __cplusplus >= 201103L
+
+bool array_test()
+{
+  static constexpr std::size_t array_size = 42;
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::array<int, array_size> array{};
+      ok = array[0] == 0
+	   && array[array_size - 1] == 0;
+    }
+  return ok;
+}
+
+bool forward_list_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::forward_list<int> forward_list;
+      ok = forward_list.empty();
+    }
+  return ok;
+}
+
+bool unordered_map_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::unordered_map<int, int> unordered_map;
+      ok = unordered_map.empty();
+    }
+  return ok;
+}
+
+bool unordered_set_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::unordered_set<int> unordered_set;
+      ok = unordered_set.empty();
+    }
+  return ok;
+}
+
+bool unordered_multimap_test()
+{
+
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::unordered_multimap<int, int> unordered_multimap;
+      ok = unordered_multimap.empty();
+    }
+  return ok;
+}
+
+bool unordered_multiset_test()
+{
+
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      std::unordered_multiset<int> unordered_multiset;
+      ok = unordered_multiset.empty();
+    }
+  return ok;
+}
+
+#else
+bool array_test() { return true; }
+bool forward_list_test() { return true; }
+bool unordered_map_test() { return true; }
+bool unordered_set_test() { return true; }
+bool unordered_multimap_test() { return true; }
+bool unordered_multiset_test() { return true; }
+#endif
+
+int main()
+{
+  const bool vec_res                = vector_test();
+  __builtin_printf("vector            : %s\n", vec_res                ? "PASS" : "FAIL");
+  const bool deque_res              = deque_test();
+  __builtin_printf("deque             : %s\n", deque_res              ? "PASS" : "FAIL");
+  const bool list_res               = list_test();
+  __builtin_printf("list              : %s\n", list_res               ? "PASS" : "FAIL");
+  const bool map_res                = map_test();
+  __builtin_printf("map               : %s\n", map_res                ? "PASS" : "FAIL");
+  const bool set_res                = set_test();
+  __builtin_printf("set               : %s\n", set_res                ? "PASS" : "FAIL");
+  const bool multimap_res           = multimap_test();
+  __builtin_printf("multimap          : %s\n", multimap_res           ? "PASS" : "FAIL");
+  const bool multiset_res           = multiset_test();
+  __builtin_printf("multiset          : %s\n", multiset_res           ? "PASS" : "FAIL");
+  const bool array_res              = array_test();
+  __builtin_printf("array             : %s\n", array_res              ? "PASS" : "FAIL");
+  const bool forward_list_res       = forward_list_test();
+  __builtin_printf("forward_list      : %s\n", forward_list_res       ? "PASS" : "FAIL");
+  const bool unordered_map_res      = unordered_map_test();
+  __builtin_printf("unordered_map     : %s\n", unordered_map_res      ? "PASS" : "FAIL");
+  const bool unordered_set_res      = unordered_set_test();
+  __builtin_printf("unordered_set     : %s\n", unordered_set_res      ? "PASS" : "FAIL");
+  const bool unordered_multimap_res = unordered_multimap_test();
+  __builtin_printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL");
+  const bool unordered_multiset_res = unordered_multiset_test();
+  __builtin_printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL");
+  const bool ok = vec_res
+		  && deque_res
+		  && list_res
+		  && map_res
+		  && set_res
+		  && multimap_res
+		  && multiset_res
+		  && array_res
+		  && forward_list_res
+		  && unordered_map_res
+		  && unordered_set_res
+		  && unordered_multimap_res
+		  && unordered_multiset_res;
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-100.C b/libgomp/testsuite/libgomp.c++/target-flex-100.C
new file mode 100644
index 0000000..7ab047f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-100.C
@@ -0,0 +1,210 @@
+/* Container adaptors in target region.
+   Does not test comparison operators other than equality to allow these tests
+   to be generalized to arbitrary input data.  */
+
+#include <algorithm>
+#include <cstdio>
+#include <deque>
+#include <queue>
+#include <stack>
+#include <vector>
+
+#include "target-flex-common.h"
+
+template<typename T, std::size_t Size>
+bool test_stack(T (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      const std::size_t half_size = Size / 2;
+      const T first_element = arr[0];
+      const T middle_element = arr[half_size - 1];
+      const T last_element = arr[Size - 1];
+      typedef std::stack<T, std::vector<T> > stack_type;
+      stack_type stack;
+      VERIFY (stack.empty());
+      VERIFY (stack.size() == 0);
+      {
+	/* Do half with push.  */
+	std::size_t idx = 0;
+	for (; idx < half_size; ++idx)
+	  {
+	    stack.push(arr[idx]);
+	    VERIFY (stack.top() == arr[idx]);
+	  }
+	VERIFY (stack.size() == half_size);
+	VERIFY (static_cast<const stack_type&>(stack).size() == half_size);
+	for (; idx < Size; ++idx)
+	  {
+	    #if __cplusplus >= 201103L
+	      /* Do the rest with emplace if C++11 or higher.  */
+	      stack.emplace(arr[idx]);
+	    #else
+	      /* Otherwise just use push again.  */
+	      stack.push(arr[idx]);
+	    #endif
+	    VERIFY (stack.top() == arr[idx]);
+	  }
+	VERIFY (stack.size() == Size);
+	VERIFY (static_cast<const stack_type&>(stack).size() == Size);
+
+	const stack_type stack_orig = stack_type(std::vector<T>(arr, arr + Size));
+	VERIFY (stack == stack_orig);
+	/* References are contained in their own scope so we don't accidently
+	   add tests referencing them after they have been invalidated.  */
+	{
+	  const T& const_top = static_cast<const stack_type&>(stack).top();
+	  VERIFY (const_top == last_element);
+	  T& mutable_top = stack.top();
+	  mutable_top = first_element;
+	  VERIFY (const_top == first_element);
+	}
+	/* Will only compare inequal if the first and last elements are different.  */
+	VERIFY (first_element != last_element || stack != stack_orig);
+	for (std::size_t count = Size - half_size; count != 0; --count)
+	  stack.pop();
+	VERIFY (stack.top() == middle_element);
+	const stack_type stack_half_orig = stack_type(std::vector<T>(arr, arr + half_size));
+	VERIFY (stack == stack_half_orig);
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+template<typename T, std::size_t Size>
+bool test_queue(T (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      const std::size_t half_size = Size / 2;
+      const T first_element = arr[0];
+      const T last_element = arr[Size - 1];
+      typedef std::queue<T, std::deque<T> > queue_type;
+      queue_type queue;
+      VERIFY (queue.empty());
+      VERIFY (queue.size() == 0);
+      {
+	/* Do half with push.  */
+	std::size_t idx = 0;
+	for (; idx < half_size; ++idx)
+	  {
+	    queue.push(arr[idx]);
+	    VERIFY (queue.back() == arr[idx]);
+	    VERIFY (queue.front() == first_element);
+	  }
+	VERIFY (queue.size() == half_size);
+	VERIFY (static_cast<const queue_type&>(queue).size() == half_size);
+	for (; idx < Size; ++idx)
+	  {
+	    #if __cplusplus >= 201103L
+	      /* Do the rest with emplace if C++11 or higher.  */
+	      queue.emplace(arr[idx]);
+	    #else
+	      /* Otherwise just use push again.  */
+	      queue.push(arr[idx]);
+	    #endif
+	    VERIFY (queue.back() == arr[idx]);
+	  }
+	VERIFY (queue.size() == Size);
+	VERIFY (static_cast<const queue_type&>(queue).size() == Size);
+
+	const queue_type queue_orig = queue_type(std::deque<T>(arr, arr + Size));
+	VERIFY (queue == queue_orig);
+
+	/* References are contained in their own scope so we don't accidently
+	   add tests referencing them after they have been invalidated.  */
+	{
+	  const T& const_front = static_cast<const queue_type&>(queue).front();
+	  VERIFY (const_front == first_element);
+	  T& mutable_front = queue.front();
+
+	  const T& const_back = static_cast<const queue_type&>(queue).back();
+	  VERIFY (const_back == last_element);
+	  T& mutable_back = queue.back();
+	  {
+	    using std::swap;
+	    swap(mutable_front, mutable_back);
+	  }
+	  VERIFY (const_front == last_element);
+	  VERIFY (const_back == first_element);
+	  /* Will only compare inequal if the first and last elements are different.  */
+	  VERIFY (first_element != last_element || queue != queue_orig);
+	  /* Return the last element to normal for the next comparison.  */
+	  mutable_back = last_element;
+	}
+
+	const T middle_element = arr[half_size];
+	for (std::size_t count = Size - half_size; count != 0; --count)
+	  queue.pop();
+	VERIFY (queue.front() == middle_element);
+	const queue_type queue_upper_half = queue_type(std::deque<T>(arr + half_size, arr + Size));
+	VERIFY (queue == queue_upper_half);
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+template<typename T, std::size_t Size>
+bool test_priority_queue(T (&arr)[Size], const T min_value, const T max_value)
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      typedef std::priority_queue<T, std::vector<T> > priority_queue_type;
+      {
+	priority_queue_type pqueue;
+	VERIFY (pqueue.empty());
+	VERIFY (pqueue.size() == 0);
+      }
+      {
+	priority_queue_type pqueue(arr, arr + Size);
+	VERIFY (!pqueue.empty());
+	VERIFY (pqueue.size() == Size);
+	VERIFY (static_cast<const priority_queue_type&>(pqueue).size() == Size);
+
+	const T old_max = pqueue.top();
+
+	#if __cplusplus >= 201103L
+	  pqueue.emplace(max_value);
+	#else
+	  pqueue.push(max_value);
+	#endif
+	VERIFY (pqueue.top() == max_value);
+	pqueue.pop();
+	VERIFY (pqueue.top() == old_max);
+	pqueue.push(min_value);
+	VERIFY (pqueue.top() == old_max);
+	pqueue.push(max_value);
+	VERIFY (pqueue.top() == max_value);
+	pqueue.pop();
+	VERIFY (pqueue.top() == old_max);
+	VERIFY (pqueue.size() == Size + 1);
+
+	for (std::size_t count = Size; count != 0; --count)
+	  pqueue.pop();
+	VERIFY (pqueue.size() == 1);
+	VERIFY (pqueue.top() == min_value);
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+int main()
+{
+  int arr[10] = {0,1,2,3,4,5,6,7,8,9};
+
+  return test_stack(arr)
+	 && test_queue(arr)
+	 && test_priority_queue(arr, 0, 1000) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-101.C b/libgomp/testsuite/libgomp.c++/target-flex-101.C
new file mode 100644
index 0000000..be9037e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-101.C
@@ -0,0 +1,136 @@
+/* { dg-additional-options -std=c++23 } */
+
+/* C++23 container adaptors in target region.
+   Severely needs additional tests.  */
+
+#include <cstdio>
+#include <utility>
+#include <version>
+
+#if __cpp_lib_flat_map >= 202207L
+#define ENABLE_FLAT_MAP 1
+#endif
+#if __cpp_lib_flat_set >= 202207L
+#define ENABLE_FLAT_SET 1
+#endif
+
+#ifdef ENABLE_FLAT_MAP
+#include <flat_map>
+#endif
+#ifdef ENABLE_FLAT_SET
+#include <flat_set>
+#endif
+
+#include "target-flex-common.h"
+
+#ifdef ENABLE_FLAT_MAP
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_map(std::pair<K, V> (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	using flat_map_type = std::flat_map<K, V>;
+	flat_map_type map = {arr, arr + Size};
+
+	VERIFY (!map.empty());
+	for (const auto& element : arr)
+	  VERIFY (map.contains(element.first));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_multimap(std::pair<K, V> (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	using flat_map_type = std::flat_map<K, V>;
+	flat_map_type map = {arr, arr + Size};
+
+	VERIFY (!map.empty());
+	for (const auto& element : arr)
+	  VERIFY (map.contains(element.first));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+#else
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_map(std::pair<K, V> (&arr)[Size]) { return true; }
+
+template<typename K, typename V, typename std::size_t Size>
+bool test_flat_multimap(std::pair<K, V> (&arr)[Size]) { return true; }
+#endif
+
+#ifdef ENABLE_FLAT_SET
+template<typename T, typename std::size_t Size>
+bool test_flat_set(T (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	using flat_set_type = std::flat_set<T>;
+	flat_set_type set = {arr, arr + Size};
+
+	VERIFY (!set.empty());
+	for (const auto& element : arr)
+	  VERIFY (set.contains(element));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+template<typename T, typename std::size_t Size>
+bool test_flat_multiset(T (&arr)[Size])
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	using flat_multiset_type = std::flat_multiset<T>;
+	flat_multiset_type multiset = {arr, arr + Size};
+
+	VERIFY (!multiset.empty());
+	for (const auto& element : arr)
+	  VERIFY (multiset.contains(element));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+#else
+template<typename T, typename std::size_t Size>
+bool test_flat_set(T (&arr)[Size]) { return true; }
+
+template<typename T, typename std::size_t Size>
+bool test_flat_multiset(T (&arr)[Size]) { return true; }
+#endif
+
+int main()
+{
+  int arr[10] = {0,1,2,3,4,5,6,7,8,9};
+  std::pair<int, int> pairs[10] = {{ 1,  2}, { 2,  4}, { 3,  6}, { 4,  8}, { 5, 10},
+				   { 6, 12}, { 7, 14}, { 8, 16}, { 9, 18}, {10, 20}};
+
+  return test_flat_set(arr)
+	 && test_flat_multiset(arr)
+	 && test_flat_map(pairs)
+	 && test_flat_multimap(pairs) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-11.C b/libgomp/testsuite/libgomp.c++/target-flex-11.C
new file mode 100644
index 0000000..6d55129
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-11.C
@@ -0,0 +1,444 @@
+/* Check constructors/destructors are called in containers.  */
+
+#include <vector>
+#include <deque>
+#include <list>
+#include <set>
+#include <map>
+#include <utility>
+#if __cplusplus >= 201103L
+#include <array>
+#include <forward_list>
+#include <unordered_set>
+#include <unordered_map>
+#endif
+
+#include "target-flex-common.h"
+
+struct indirect_counter
+{
+  typedef int counter_value_type;
+  counter_value_type *_count_ptr;
+
+  indirect_counter(counter_value_type *count_ptr) BL_NOEXCEPT : _count_ptr(count_ptr) {
+    ++(*_count_ptr);
+  }
+  indirect_counter(const indirect_counter& other) BL_NOEXCEPT : _count_ptr(other._count_ptr) {
+    ++(*_count_ptr);
+  }
+  /* Don't declare a move constructor, we want to copy no matter what.  */
+  ~indirect_counter() {
+    --(*_count_ptr);
+  }
+};
+
+bool operator==(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT
+  { return lhs._count_ptr == rhs._count_ptr; }
+bool operator<(indirect_counter const& lhs, indirect_counter const& rhs) BL_NOEXCEPT
+  { return lhs._count_ptr < rhs._count_ptr; }
+
+#if __cplusplus >= 201103L
+template<>
+struct std::hash<indirect_counter>
+{
+  std::size_t operator()(const indirect_counter& ic) const noexcept
+    { return std::hash<indirect_counter::counter_value_type *>{}(ic._count_ptr); }
+};
+#endif
+
+/* Not a container, just a sanity check really.  */
+bool automatic_lifetime_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	indirect_counter c = indirect_counter(&counter);
+	indirect_counter(static_cast<int*>(&counter));
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool vector_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::vector<indirect_counter> vec(42, indirect_counter(&counter));
+	VERIFY (counter == 42);
+	vec.resize(32, indirect_counter(&counter));
+	VERIFY (counter == 32);
+	vec.push_back(indirect_counter(&counter));
+	VERIFY (counter == 33);
+	vec.pop_back();
+	VERIFY (counter == 32);
+	vec.pop_back();
+	VERIFY (counter == 31);
+	vec.resize(100, indirect_counter(&counter));
+	VERIFY (counter == 100);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool deque_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::deque<indirect_counter> vec(42, indirect_counter(&counter));
+	VERIFY (counter == 42);
+	vec.resize(32, indirect_counter(&counter));
+	VERIFY (counter == 32);
+	vec.push_back(indirect_counter(&counter));
+	VERIFY (counter == 33);
+	vec.pop_back();
+	VERIFY (counter == 32);
+	vec.pop_back();
+	VERIFY (counter == 31);
+	vec.resize(100, indirect_counter(&counter));
+	VERIFY (counter == 100);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool list_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::list<indirect_counter> list(42, indirect_counter(&counter));
+	VERIFY (counter == 42);
+	list.resize(32, indirect_counter(&counter));
+	VERIFY (counter == 32);
+	list.push_back(indirect_counter(&counter));
+	VERIFY (counter == 33);
+	list.pop_back();
+	VERIFY (counter == 32);
+	list.pop_back();
+	VERIFY (counter == 31);
+	list.resize(100, indirect_counter(&counter));
+	VERIFY (counter == 100);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool map_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::map<int, indirect_counter> map;
+	map.insert(std::make_pair(1, indirect_counter(&counter)));
+	VERIFY (counter == 1);
+	map.insert(std::make_pair(1, indirect_counter(&counter)));
+	VERIFY (counter == 1);
+	map.insert(std::make_pair(2, indirect_counter(&counter)));
+	VERIFY (counter == 2);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool set_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter0 = 0;
+      int counter1 = 0;
+      {
+	std::set<indirect_counter> set;
+	set.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	set.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	set.insert(indirect_counter(&counter1));
+	VERIFY (counter0 == 1 && counter1 == 1);
+      }
+      VERIFY (counter0 == 0 && counter1 == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool multimap_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::multimap<int, indirect_counter> multimap;
+	multimap.insert(std::make_pair(1, indirect_counter(&counter)));
+	VERIFY (counter == 1);
+	multimap.insert(std::make_pair(1, indirect_counter(&counter)));
+	VERIFY (counter == 2);
+	multimap.insert(std::make_pair(2, indirect_counter(&counter)));
+	VERIFY (counter == 3);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool multiset_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter0 = 0;
+      int counter1 = 0;
+      {
+	std::multiset<indirect_counter> multiset;
+	multiset.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	multiset.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 2);
+	multiset.insert(indirect_counter(&counter1));
+	VERIFY (counter0 == 2 && counter1 == 1);
+      }
+      VERIFY (counter0 == 0 && counter1 == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+#if __cplusplus >= 201103L
+
+bool array_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	indirect_counter ic(&counter);
+	std::array<indirect_counter, 10> array{ic, ic, ic, ic, ic,
+					       ic, ic, ic, ic, ic};
+	VERIFY (counter == 11);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool forward_list_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::forward_list<indirect_counter> forward_list(42, indirect_counter(&counter));
+	VERIFY (counter == 42);
+	forward_list.resize(32, indirect_counter(&counter));
+	VERIFY (counter == 32);
+	forward_list.push_front(indirect_counter(&counter));
+	VERIFY (counter == 33);
+	forward_list.pop_front();
+	VERIFY (counter == 32);
+	forward_list.pop_front();
+	VERIFY (counter == 31);
+	forward_list.resize(100, indirect_counter(&counter));
+	VERIFY (counter == 100);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool unordered_map_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::unordered_map<int, indirect_counter> unordered_map;
+	unordered_map.insert({1, indirect_counter(&counter)});
+	VERIFY (counter == 1);
+	unordered_map.insert({1, indirect_counter(&counter)});
+	VERIFY (counter == 1);
+	unordered_map.insert({2, indirect_counter(&counter)});
+	VERIFY (counter == 2);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool unordered_set_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter0 = 0;
+      int counter1 = 0;
+      {
+	std::unordered_set<indirect_counter> unordered_set;
+	unordered_set.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	unordered_set.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	unordered_set.insert(indirect_counter(&counter1));
+	VERIFY (counter0 == 1 && counter1 == 1);
+      }
+      VERIFY (counter0 == 0 && counter1 == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool unordered_multimap_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter = 0;
+      {
+	std::unordered_multimap<int, indirect_counter> unordered_multimap;
+	unordered_multimap.insert({1, indirect_counter(&counter)});
+	VERIFY (counter == 1);
+	unordered_multimap.insert({1, indirect_counter(&counter)});
+	VERIFY (counter == 2);
+	unordered_multimap.insert({2, indirect_counter(&counter)});
+	VERIFY (counter == 3);
+      }
+      VERIFY (counter == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+bool unordered_multiset_test()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      bool inner_ok = true;
+      int counter0 = 0;
+      int counter1 = 0;
+      {
+	std::unordered_multiset<indirect_counter> unordered_multiset;
+	unordered_multiset.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 1);
+	unordered_multiset.insert(indirect_counter(&counter0));
+	VERIFY (counter0 == 2);
+	unordered_multiset.insert(indirect_counter(&counter1));
+	VERIFY (counter0 == 2 && counter1 == 1);
+      }
+      VERIFY (counter0 == 0 && counter1 == 0);
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+#else
+bool array_test() { return true; }
+bool forward_list_test() { return true; }
+bool unordered_map_test() { return true; }
+bool unordered_set_test() { return true; }
+bool unordered_multimap_test() { return true; }
+bool unordered_multiset_test() { return true; }
+#endif
+
+int main()
+{
+  const bool auto_res               = automatic_lifetime_test();
+  const bool vec_res                = vector_test();
+  const bool deque_res              = deque_test();
+  const bool list_res               = list_test();
+  const bool map_res                = map_test();
+  const bool set_res                = set_test();
+  const bool multimap_res           = multimap_test();
+  const bool multiset_res           = multiset_test();
+  const bool array_res              = array_test();
+  const bool forward_list_res       = forward_list_test();
+  const bool unordered_map_res      = unordered_map_test();
+  const bool unordered_set_res      = unordered_set_test();
+  const bool unordered_multimap_res = unordered_multimap_test();
+  const bool unordered_multiset_res = unordered_multiset_test();
+  std::printf("sanity check      : %s\n", auto_res               ? "PASS" : "FAIL");
+  std::printf("vector            : %s\n", vec_res                ? "PASS" : "FAIL");
+  std::printf("deque             : %s\n", deque_res              ? "PASS" : "FAIL");
+  std::printf("list              : %s\n", list_res               ? "PASS" : "FAIL");
+  std::printf("map               : %s\n", map_res                ? "PASS" : "FAIL");
+  std::printf("set               : %s\n", set_res                ? "PASS" : "FAIL");
+  std::printf("multimap          : %s\n", multimap_res           ? "PASS" : "FAIL");
+  std::printf("multiset          : %s\n", multiset_res           ? "PASS" : "FAIL");
+  std::printf("array             : %s\n", array_res              ? "PASS" : "FAIL");
+  std::printf("forward_list      : %s\n", forward_list_res       ? "PASS" : "FAIL");
+  std::printf("unordered_map     : %s\n", unordered_map_res      ? "PASS" : "FAIL");
+  std::printf("unordered_set     : %s\n", unordered_set_res      ? "PASS" : "FAIL");
+  std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL");
+  std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL");
+  const bool ok = auto_res
+		  && vec_res
+		  && deque_res
+		  && list_res
+		  && map_res
+		  && set_res
+		  && multimap_res
+		  && multiset_res
+		  && array_res
+		  && forward_list_res
+		  && unordered_map_res
+		  && unordered_set_res
+		  && unordered_multimap_res
+		  && unordered_multiset_res;
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-12.C b/libgomp/testsuite/libgomp.c++/target-flex-12.C
new file mode 100644
index 0000000..024fb73
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-12.C
@@ -0,0 +1,736 @@
+/* Populated with mapped data, validate, mutate, validate again.
+   The cases using sets do not mutate.
+   Note: Some of the code in here really sucks due to being made to be
+   compatible with c++98.  */
+
+#include <vector>
+#include <deque>
+#include <list>
+#include <set>
+#include <map>
+#if __cplusplus >= 201103L
+#include <array>
+#include <forward_list>
+#include <unordered_set>
+#include <unordered_map>
+#endif
+
+#include <limits>
+#include <iterator>
+
+#include "target-flex-common.h"
+
+template<bool B, class T = void>
+struct enable_if {};
+ 
+template<class T>
+struct enable_if<true, T> { typedef T type; };
+
+struct identity_func
+{
+#if __cplusplus < 201103L
+  template<typename T>
+  T& operator()(T& arg) const BL_NOEXCEPT { return arg; }
+  template<typename T>
+  T const& operator()(T const& arg) const BL_NOEXCEPT { return arg; }
+#else
+  template<typename T>
+  constexpr T&& operator()(T&& arg) const BL_NOEXCEPT { return std::forward<T>(arg); }
+#endif
+};
+
+/* Applies projection to the second iterator.  */
+template<typename It0, typename It1, typename Proj>
+bool validate_sequential_elements(const It0 begin0, const It0 end0,
+				  const It1 begin1, const It1 end1,
+				  Proj proj) BL_NOEXCEPT
+{
+  It0 it0 = begin0;
+  It1 it1 = begin1;
+  for (; it0 != end0; ++it0, ++it1)
+    {
+      /* Sizes mismatch, don't bother aborting though just fail the test.  */
+      if (it1 == end1)
+	return false;
+      if (*it0 != proj(*it1))
+	return false;
+    }
+  /* Sizes mismatch, do as above.  */
+  if (it1 != end1)
+    return false;
+  return true;
+}
+
+template<typename It0, typename It1>
+bool validate_sequential_elements(const It0 begin0, const It0 end0,
+				  const It1 begin1, const It1 end1) BL_NOEXCEPT
+{
+  return validate_sequential_elements(begin0, end0, begin1, end1, identity_func());
+}
+
+/* Inefficient, but simple.  */
+template<typename It, typename OutIt>
+void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT
+{
+  for (It it = begin; it != end; ++it, ++out)
+    *out = *it;
+}
+
+template<typename It, typename MutateFn>
+void simple_mutate(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT
+{
+  for (It it = begin; it != end; ++it)
+    *it = mut_fn(*it);
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool vector_test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  T out_mut_arr[Size];
+  #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::vector<T> vector(arr, arr + Size);
+	VERIFY (validate_sequential_elements(vector.begin(), vector.end(),
+					     arr, arr + Size));
+	simple_copy(vector.begin(), vector.end(), out_arr);
+	simple_mutate(vector.begin(), vector.end(), MutationFunc());
+	VERIFY (validate_sequential_elements(vector.begin(), vector.end(),
+					     arr, arr + Size, MutationFunc()));
+	simple_copy(vector.begin(), vector.end(), out_mut_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+						  arr, arr + Size));
+  VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+						  arr, arr + Size, MutationFunc()));
+  return true;
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool deque_test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  T out_mut_arr[Size];
+  #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::deque<T> deque(arr, arr + Size);
+	VERIFY (validate_sequential_elements(deque.begin(), deque.end(),
+					     arr, arr + Size));
+	simple_copy(deque.begin(), deque.end(), out_arr);
+	simple_mutate(deque.begin(), deque.end(), MutationFunc());
+	VERIFY (validate_sequential_elements(deque.begin(), deque.end(),
+					     arr, arr + Size, MutationFunc()));
+	simple_copy(deque.begin(), deque.end(), out_mut_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+						  arr, arr + Size));
+  VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+						  arr, arr + Size, MutationFunc()));
+  return true;
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool list_test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  T out_mut_arr[Size];
+  #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::list<T> list(arr, arr + Size);
+	VERIFY (validate_sequential_elements(list.begin(), list.end(),
+					     arr, arr + Size));
+	simple_copy(list.begin(), list.end(), out_arr);
+	simple_mutate(list.begin(), list.end(), MutationFunc());
+	VERIFY (validate_sequential_elements(list.begin(), list.end(),
+					     arr, arr + Size, MutationFunc()));
+	simple_copy(list.begin(), list.end(), out_mut_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+						  arr, arr + Size));
+  VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+						  arr, arr + Size, MutationFunc()));
+  return true;
+}
+
+template<typename T>
+const T& get_key(const T& arg) BL_NOEXCEPT
+  { return arg; }
+template<typename K, typename V>
+const K& get_key(const std::pair<K, V>& pair) BL_NOEXCEPT
+  { return pair.first; }
+template<typename T>
+const T& get_value(const T& arg) BL_NOEXCEPT
+  { return arg; }
+template<typename K, typename V>
+const K& get_value(const std::pair<K, V>& pair) BL_NOEXCEPT
+  { return pair.second; }
+
+template<typename T>
+struct key_type { typedef T type; };
+template<typename K, typename V>
+struct key_type<std::pair<K, V> > { typedef K type; };
+
+template<typename Proj, typename Container, typename It>
+bool validate_associative(const Container& container,
+			  const It compare_begin,
+			  const It compare_end,
+			  Proj proj) BL_NOEXCEPT
+{
+  const typename Container::const_iterator elem_end = container.end();
+  for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it)
+    {
+      const typename Container::const_iterator elem_it = container.find(get_key(*compare_it));
+      VERIFY_NON_TARGET (elem_it != elem_end);
+      VERIFY_NON_TARGET (proj(get_value(*compare_it)) == get_value(*elem_it));
+    }
+  return true;
+}
+
+template<typename Container, typename It>
+bool validate_associative(const Container& container,
+			  const It compare_begin,
+			  const It compare_end) BL_NOEXCEPT
+{
+  return validate_associative(container, compare_begin, compare_end, identity_func());
+}
+
+template<typename It, typename MutateFn>
+void simple_mutate_map(const It begin, const It end, MutateFn mut_fn) BL_NOEXCEPT
+{
+  for (It it = begin; it != end; ++it)
+    it->second = mut_fn(it->second);
+}
+
+template<typename It, typename OutIter>
+void simple_copy_unique(const It begin, const It end, OutIter out) BL_NOEXCEPT
+{
+  /* In case anyone reads this, I want it to be known that I hate c++98.  */
+  typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t;
+  std::set<key_t> already_seen;
+  for (It it = begin; it != end; ++it, ++out)
+    {
+      key_t key = get_key(*it);
+      if (already_seen.find(key) != already_seen.end())
+	continue;
+      already_seen.insert(key);
+      *out = *it;
+    }
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool map_test(const std::pair<K, V> (&arr)[Size])
+{
+  std::map<K, V> reference_map(arr, arr + Size);
+  bool ok;
+  /* Both sizes should be the same.  */
+  std::pair<K, V> out_pairs[Size];
+  std::size_t out_size;
+  std::pair<K, V> out_pairs_mut[Size];
+  std::size_t out_size_mut;
+  #pragma omp target map(from: ok, out_pairs[:Size], out_size, \
+			       out_pairs_mut[:Size], out_size_mut) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::vector<std::pair<K, V> > unique_elems;
+	simple_copy_unique(arr, arr + Size,
+			   std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems));
+
+	std::map<K, V> map(arr, arr + Size);
+	VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end()));
+	simple_copy(map.begin(), map.end(), out_pairs);
+	out_size = map.size();
+	simple_mutate_map(map.begin(), map.end(), MutationFunc());
+	VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(),
+				     MutationFunc()));
+	simple_copy(map.begin(), map.end(), out_pairs_mut);
+	out_size_mut = map.size();
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (out_size == out_size_mut);
+  VERIFY_NON_TARGET (validate_associative(reference_map,
+					  out_pairs, out_pairs + out_size));
+  simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc());
+  VERIFY_NON_TARGET (validate_associative(reference_map,
+					  out_pairs_mut, out_pairs_mut + out_size_mut));
+  return true;
+}
+
+template<typename T, std::size_t Size>
+bool set_test(const T (&arr)[Size])
+{
+  std::set<T> reference_set(arr, arr + Size);
+  bool ok;
+  /* Both sizes should be the same.  */
+  T out_arr[Size];
+  std::size_t out_size;
+  #pragma omp target map(from: ok, out_arr[:Size], out_size) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::vector<T> unique_elems;
+	simple_copy_unique(arr, arr + Size,
+			   std::back_insert_iterator<std::vector<T> >(unique_elems));
+
+	std::set<T> set(arr, arr + Size);
+	VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end()));
+	simple_copy(set.begin(), set.end(), out_arr);
+	out_size = set.size();
+	/* Sets can't be mutated, we could create another set with mutated
+	   but it gets a little annoying and probably isn't an interesting test.  */
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_associative(reference_set,
+					  out_arr, out_arr + out_size));
+  return true;
+}
+
+template<typename Proj, typename Container, typename It>
+bool validate_multi_associative(const Container& container,
+				const It compare_begin,
+				const It compare_end,
+				Proj proj) BL_NOEXCEPT
+{
+  /* Once again, for the poor soul reviewing these, I hate c++98.  */
+  typedef typename key_type<typename std::iterator_traits<It>::value_type>::type key_t;
+  typedef std::map<key_t, std::size_t> counter_map; 
+  counter_map key_count_map;
+  for (It it = compare_begin; it != compare_end; ++it)
+    {
+      const key_t& key = get_key(*it);
+      typename counter_map::iterator counter_it
+	= key_count_map.find(key);
+      if (counter_it != key_count_map.end())
+	++counter_it->second;
+      else
+	key_count_map.insert(std::pair<const key_t, std::size_t>(key, std::size_t(1)));
+    }
+  const typename Container::const_iterator elem_end = container.end();
+  for (It compare_it = compare_begin; compare_it != compare_end; ++compare_it)
+    {
+      const key_t& key = get_key(*compare_it);
+      typename counter_map::iterator count_it = key_count_map.find(key);
+      std::size_t key_count = count_it != key_count_map.end() ? count_it->second
+							      : std::size_t(0);
+      VERIFY_NON_TARGET (key_count > std::size_t(0) && "this will never happen");
+      /* This gets tested multiple times but that should be fine.  */
+      VERIFY_NON_TARGET (key_count == container.count(key));
+      typename Container::const_iterator elem_it = container.find(key);
+      /* This will never happen if the previous case passed.  */
+      VERIFY_NON_TARGET (elem_it != elem_end);
+      bool found_element = false;
+      for (; elem_it != elem_end; ++elem_it)
+	if (proj(get_value(*compare_it)) == get_value(*elem_it))
+	  {
+	    found_element = true;
+	    break;
+	  }
+      VERIFY_NON_TARGET (found_element);
+    }
+  return true;
+}
+
+template<typename Container, typename It>
+bool validate_multi_associative(const Container& container,
+				const It compare_begin,
+				const It compare_end) BL_NOEXCEPT
+{
+  return validate_multi_associative(container, compare_begin, compare_end, identity_func());
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool multimap_test(const std::pair<K, V> (&arr)[Size])
+{
+  std::multimap<K, V> reference_multimap(arr, arr + Size);
+  bool ok;
+  std::pair<K, V> out_pairs[Size];
+  std::pair<K, V> out_pairs_mut[Size];
+  #pragma omp target map(from: ok, out_pairs[:Size], out_pairs_mut[:Size]) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::multimap<K, V> multimap(arr, arr + Size);
+	VERIFY (validate_multi_associative(multimap, arr, arr + Size));
+	simple_copy(multimap.begin(), multimap.end(), out_pairs);
+	simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc());
+	VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc()));
+	simple_copy(multimap.begin(), multimap.end(), out_pairs_mut);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+						out_pairs, out_pairs + Size));
+  simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc());
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+						out_pairs_mut, out_pairs_mut + Size));
+  return true;
+}
+
+template<typename T, std::size_t Size>
+bool multiset_test(const T (&arr)[Size])
+{
+  std::multiset<T> reference_multiset(arr, arr + Size);
+  bool ok;
+  T out_arr[Size];
+  #pragma omp target map(from: ok, out_arr[:Size]) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::multiset<T> set(arr, arr + Size);
+	VERIFY (validate_multi_associative(set, arr, arr + Size));
+	simple_copy(set.begin(), set.end(), out_arr);
+	/* Sets can't be mutated, we could create another set with mutated
+	   but it gets a little annoying and probably isn't an interesting test.  */
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multiset,
+						out_arr, out_arr + Size));
+  return true;
+}
+
+#if __cplusplus >= 201103L
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool array_test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  T out_mut_arr[Size];
+  #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::array<T, Size> std_array{};
+	/* Special case for std::array since it can't be initialized
+	   with iterators.  */
+	{
+	  T zero_val = T{};
+	  for (auto it = std_array.begin(); it != std_array.end(); ++it)
+	    VERIFY (*it == zero_val);
+	}
+	simple_copy(arr, arr + Size, std_array.begin());
+	VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(),
+					     arr, arr + Size));
+	simple_copy(std_array.begin(), std_array.end(), out_arr);
+	simple_mutate(std_array.begin(), std_array.end(), MutationFunc());
+	VERIFY (validate_sequential_elements(std_array.begin(), std_array.end(),
+					     arr, arr + Size, MutationFunc()));
+	simple_copy(std_array.begin(), std_array.end(), out_mut_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+						  arr, arr + Size));
+  VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+						  arr, arr + Size, MutationFunc()));
+  return true;
+}
+
+template<typename MutationFunc, typename T, std::size_t Size>
+bool forward_list_test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  T out_mut_arr[Size];
+  #pragma omp target map(from: ok, out_arr[:Size], out_mut_arr[:Size]) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::forward_list<T> fwd_list(arr, arr + Size);
+	VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(),
+					     arr, arr + Size));
+	simple_copy(fwd_list.begin(), fwd_list.end(), out_arr);
+	simple_mutate(fwd_list.begin(), fwd_list.end(), MutationFunc());
+	VERIFY (validate_sequential_elements(fwd_list.begin(), fwd_list.end(),
+					     arr, arr + Size, MutationFunc()));
+	simple_copy(fwd_list.begin(), fwd_list.end(), out_mut_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_sequential_elements(out_arr, out_arr + Size,
+						  arr, arr + Size));
+  VERIFY_NON_TARGET (validate_sequential_elements(out_mut_arr, out_mut_arr + Size,
+						  arr, arr + Size, MutationFunc()));
+  return true;
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool unordered_map_test(const std::pair<K, V> (&arr)[Size])
+{
+  std::unordered_map<K, V> reference_map(arr, arr + Size);
+  bool ok;
+  /* Both sizes should be the same.  */
+  std::pair<K, V> out_pairs[Size];
+  std::size_t out_size;
+  std::pair<K, V> out_pairs_mut[Size];
+  std::size_t out_size_mut;
+  #pragma omp target map(from: ok, out_pairs[:Size], out_size, \
+			       out_pairs_mut[:Size], out_size_mut) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::vector<std::pair<K, V> > unique_elems;
+	simple_copy_unique(arr, arr + Size,
+			   std::back_insert_iterator<std::vector<std::pair<K, V> > >(unique_elems));
+
+	std::unordered_map<K, V> map(arr, arr + Size);
+	VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end()));
+	simple_copy(map.begin(), map.end(), out_pairs);
+	out_size = map.size();
+	simple_mutate_map(map.begin(), map.end(), MutationFunc());
+	VERIFY (validate_associative(map, unique_elems.begin(), unique_elems.end(),
+				     MutationFunc()));
+	simple_copy(map.begin(), map.end(), out_pairs_mut);
+	out_size_mut = map.size();
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (out_size == out_size_mut);
+  VERIFY_NON_TARGET (validate_associative(reference_map,
+					  out_pairs, out_pairs + out_size));
+  simple_mutate_map(reference_map.begin(), reference_map.end(), MutationFunc());
+  VERIFY_NON_TARGET (validate_associative(reference_map,
+					  out_pairs_mut, out_pairs_mut + out_size_mut));
+  return true;
+}
+
+template<typename T, std::size_t Size>
+bool unordered_set_test(const T (&arr)[Size])
+{
+  std::unordered_set<T> reference_set(arr, arr + Size);
+  bool ok;
+  /* Both sizes should be the same.  */
+  T out_arr[Size];
+  std::size_t out_size;
+  #pragma omp target map(from: ok, out_arr[:Size], out_size) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::vector<T> unique_elems;
+	simple_copy_unique(arr, arr + Size,
+			   std::back_insert_iterator<std::vector<T> >(unique_elems));
+
+	std::unordered_set<T> set(arr, arr + Size);
+	VERIFY (validate_associative(set, unique_elems.begin(), unique_elems.end()));
+	simple_copy(set.begin(), set.end(), out_arr);
+	out_size = set.size();
+	/* Sets can't be mutated, we could create another set with mutated
+	   but it gets a little annoying and probably isn't an interesting test.  */
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_associative(reference_set,
+					  out_arr, out_arr + out_size));
+  return true;
+}
+
+template<typename MutationFunc, typename K, typename V, std::size_t Size>
+bool unordered_multimap_test(const std::pair<K, V> (&arr)[Size])
+{
+  std::unordered_multimap<K, V> reference_multimap(arr, arr + Size);
+  bool ok;
+  std::pair<K, V> out_pairs[Size];
+  std::pair<K, V> out_pairs_mut[Size];
+  #pragma omp target map(from: ok, out_pairs[:Size], out_pairs_mut[:Size]) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::unordered_multimap<K, V> multimap(arr, arr + Size);
+	VERIFY (validate_multi_associative(multimap, arr, arr + Size));
+	simple_copy(multimap.begin(), multimap.end(), out_pairs);
+	simple_mutate_map(multimap.begin(), multimap.end(), MutationFunc());
+	VERIFY (validate_multi_associative(multimap, arr, arr + Size, MutationFunc()));
+	simple_copy(multimap.begin(), multimap.end(), out_pairs_mut);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+						out_pairs, out_pairs + Size));
+  simple_mutate_map(reference_multimap.begin(), reference_multimap.end(), MutationFunc());
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multimap,
+						out_pairs_mut, out_pairs_mut + Size));
+  return true;
+}
+
+template<typename T, std::size_t Size>
+bool unordered_multiset_test(const T (&arr)[Size])
+{
+  std::unordered_multiset<T> reference_multiset(arr, arr + Size);
+  bool ok;
+  T out_arr[Size];
+  #pragma omp target map(from: ok, out_arr[:Size]) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::unordered_multiset<T> set(arr, arr + Size);
+	VERIFY (validate_multi_associative(set, arr, arr + Size));
+	simple_copy(set.begin(), set.end(), out_arr);
+	/* Sets can't be mutated, we could create another set with mutated
+	   but it gets a little annoying and probably isn't an interesting test.  */
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (validate_multi_associative(reference_multiset,
+						out_arr, out_arr + Size));
+  return true;
+}
+
+#else
+template<typename, typename T, std::size_t Size> bool array_test(const T (&arr)[Size]) { return true; }
+template<typename, typename T, std::size_t Size> bool forward_list_test(const T (&arr)[Size]) { return true; }
+template<typename, typename T, std::size_t Size> bool unordered_map_test(const T (&arr)[Size]) { return true; }
+template<typename T, std::size_t Size> bool unordered_set_test(const T (&arr)[Size]) { return true; }
+template<typename, typename T, std::size_t Size> bool unordered_multimap_test(const T (&arr)[Size]) { return true; }
+template<typename T, std::size_t Size> bool unordered_multiset_test(const T (&arr)[Size]) { return true; }
+#endif
+
+/* This clamps to the maximum value to guard against overflowing,
+   assuming std::numeric_limits is specialized for T.  */
+struct multiply_by_2
+{
+  template<typename T>
+  typename enable_if<std::numeric_limits<T>::is_specialized, T>::type
+  operator()(T arg) const BL_NOEXCEPT {
+    if (arg < static_cast<T>(0))
+      {
+	if (std::numeric_limits<T>::min() / static_cast<T>(2) >= arg)
+	  return std::numeric_limits<T>::min();
+      }
+    else
+      {
+	if (std::numeric_limits<T>::max() / static_cast<T>(2) <= arg)
+	  return std::numeric_limits<T>::max();
+      }
+    return arg * 2;
+  }
+  template<typename T>
+  typename enable_if<!std::numeric_limits<T>::is_specialized, T>::type
+  operator()(T arg) const BL_NOEXCEPT {
+    return arg * 2;
+  }
+};
+
+int main()
+{
+  int data[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+  std::pair<int, int> pairs[10] = {std::pair<int, int>( 1,  2),
+				   std::pair<int, int>( 2,  4),
+				   std::pair<int, int>( 3,  6),
+				   std::pair<int, int>( 4,  8),
+				   std::pair<int, int>( 5, 10),
+				   std::pair<int, int>( 6, 12),
+				   std::pair<int, int>( 7, 14),
+				   std::pair<int, int>( 8, 16),
+				   std::pair<int, int>( 9, 18),
+				   std::pair<int, int>(10, 20)};
+  const bool vec_res                = vector_test<multiply_by_2>(data);
+  const bool deque_res              = deque_test<multiply_by_2>(data);
+  const bool list_res               = list_test<multiply_by_2>(data);
+  const bool map_res                = map_test<multiply_by_2>(pairs);
+  const bool set_res                = set_test(data);
+  const bool multimap_res           = multimap_test<multiply_by_2>(pairs);
+  const bool multiset_res           = multiset_test(data);
+  const bool array_res              = array_test<multiply_by_2>(data);
+  const bool forward_list_res       = forward_list_test<multiply_by_2>(data);
+  const bool unordered_map_res      = unordered_map_test<multiply_by_2>(pairs);
+  const bool unordered_set_res      = unordered_set_test(data);
+  const bool unordered_multimap_res = unordered_multimap_test<multiply_by_2>(pairs);
+  const bool unordered_multiset_res = unordered_multiset_test(data);
+  std::printf("vector            : %s\n", vec_res                ? "PASS" : "FAIL");
+  std::printf("deque             : %s\n", deque_res              ? "PASS" : "FAIL");
+  std::printf("list              : %s\n", list_res               ? "PASS" : "FAIL");
+  std::printf("map               : %s\n", map_res                ? "PASS" : "FAIL");
+  std::printf("set               : %s\n", set_res                ? "PASS" : "FAIL");
+  std::printf("multimap          : %s\n", multimap_res           ? "PASS" : "FAIL");
+  std::printf("multiset          : %s\n", multiset_res           ? "PASS" : "FAIL");
+  std::printf("array             : %s\n", array_res              ? "PASS" : "FAIL");
+  std::printf("forward_list      : %s\n", forward_list_res       ? "PASS" : "FAIL");
+  std::printf("unordered_map     : %s\n", unordered_map_res      ? "PASS" : "FAIL");
+  std::printf("unordered_set     : %s\n", unordered_set_res      ? "PASS" : "FAIL");
+  std::printf("unordered_multimap: %s\n", unordered_multimap_res ? "PASS" : "FAIL");
+  std::printf("unordered_multiset: %s\n", unordered_multiset_res ? "PASS" : "FAIL");
+  const bool ok = vec_res
+		  && deque_res
+		  && list_res
+		  && map_res
+		  && set_res
+		  && multimap_res
+		  && multiset_res
+		  && array_res
+		  && forward_list_res
+		  && unordered_map_res
+		  && unordered_set_res
+		  && unordered_multimap_res
+		  && unordered_multiset_res;
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2000.C b/libgomp/testsuite/libgomp.c++/target-flex-2000.C
new file mode 100644
index 0000000..688c014
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2000.C
@@ -0,0 +1,32 @@
+/* Tiny tuple test.  */
+
+#include <tuple>
+
+#include "target-flex-common.h"
+
+bool test(int arg)
+{
+  bool ok;
+  int out;
+  std::tuple tup = {'a', arg, 3.14f};
+  #pragma omp target map(from: ok, out) map(to: tup)
+    {
+      bool inner_ok = true;
+      {
+	VERIFY (std::get<0>(tup) == 'a');
+	out = std::get<1>(tup);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (out == arg);
+  return true;
+}
+
+int main()
+{
+  volatile int arg = 42u;
+  return test(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2001.C b/libgomp/testsuite/libgomp.c++/target-flex-2001.C
new file mode 100644
index 0000000..f1a6c12
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2001.C
@@ -0,0 +1,61 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* Functional  */
+
+#include <functional>
+#include <utility>
+
+#include "target-flex-common.h"
+
+template<typename T,typename Fn>
+auto invoke_unary(T&& a, Fn&& fn) noexcept
+{
+  return std::invoke(std::forward<Fn>(fn),
+		     std::forward<T>(a));
+}
+
+template<typename T, typename U, typename Fn>
+auto invoke_binary(T&& a, U&& b, Fn&& fn) noexcept
+{
+  return std::invoke(std::forward<Fn>(fn),
+		     std::forward<T>(a),
+		     std::forward<U>(b));
+}
+
+bool test(unsigned arg)
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	VERIFY (std::plus{}(arg, 2) == arg + 2);
+	auto bound_plus_arg = std::bind_front(std::plus{}, arg);
+	VERIFY (bound_plus_arg(10) == arg + 10);
+	VERIFY (bound_plus_arg(20) == arg + 20);
+
+	VERIFY (std::not_fn(std::not_equal_to{})(arg, arg));
+	VERIFY (invoke_binary(arg, arg, std::not_fn(std::not_equal_to{})));
+	auto bound_equals_arg = std::bind_front(std::not_fn(std::not_equal_to{}), arg);
+	VERIFY (bound_equals_arg(arg));
+	VERIFY (std::not_fn(bound_equals_arg)(arg + 1));
+	VERIFY (invoke_unary(arg, bound_equals_arg));
+
+	VERIFY (std::not_fn(std::ranges::not_equal_to{})(arg, arg));
+	VERIFY (invoke_binary(arg, arg, std::not_fn(std::ranges::not_equal_to{})));
+	auto bound_ranges_equals_arg = std::bind_front(std::not_fn(std::ranges::not_equal_to{}), arg);
+	VERIFY (bound_ranges_equals_arg(arg));
+	VERIFY (std::not_fn(bound_ranges_equals_arg)(arg + 1));
+	VERIFY (invoke_unary(arg, bound_ranges_equals_arg));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+int main()
+{
+  volatile unsigned arg = 42u;
+  return test(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2002.C b/libgomp/testsuite/libgomp.c++/target-flex-2002.C
new file mode 100644
index 0000000..f738806
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2002.C
@@ -0,0 +1,97 @@
+/* { dg-additional-options "-std=c++23" } */
+
+/* expected/optional  */
+
+#include <optional>
+#include <expected>
+
+#include "target-flex-common.h"
+
+std::optional<unsigned> make_optional(bool b, unsigned arg = 0u) noexcept
+{
+  if (!b)
+    return std::nullopt;
+  return {arg};
+}
+
+bool test_optional(unsigned arg)
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	auto null_opt = make_optional(false);
+	VERIFY (!null_opt);
+	VERIFY (!null_opt.has_value());
+	VERIFY (null_opt.value_or(arg * 2u) == arg * 2u);
+	VERIFY (null_opt.or_else([&](){ return std::optional<unsigned>{arg}; })
+			.transform([](int a){ return a * 2u; })
+			.value_or(0) == arg * 2u);
+
+	auto opt = make_optional(true, arg);
+	VERIFY (opt);
+	VERIFY (opt.has_value());
+	VERIFY (opt.value() == arg);
+	VERIFY (*opt == arg);
+	VERIFY (opt.value_or(arg + 42) == arg);
+	VERIFY (opt.or_else([&](){ return std::optional<unsigned>{arg + 42}; })
+		   .transform([](int a){ return a * 2u; })
+		   .value_or(0) == arg * 2u);
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+struct my_error
+{
+  int _e;
+};
+
+std::expected<unsigned, my_error> make_expected(bool b, unsigned arg = 0u) noexcept
+{
+  if (!b)
+    return std::unexpected{my_error{-1}};
+  return {arg};
+}
+
+bool test_expected(unsigned arg)
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	auto unexpected = make_expected(false);
+	VERIFY (!unexpected);
+	VERIFY (!unexpected.has_value());
+	VERIFY (unexpected.error()._e == -1);
+	VERIFY (unexpected.value_or(arg * 2u) == arg * 2u);
+	VERIFY (unexpected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{arg}; })
+			  .transform([](int a){ return a * 2u; })
+			  .value_or(0) == arg * 2u);
+
+	auto expected = make_expected(true, arg);
+	VERIFY (expected);
+	VERIFY (expected.has_value());
+	VERIFY (expected.value() == arg);
+	VERIFY (*expected == arg);
+	VERIFY (expected.value_or(arg + 42) == arg);
+	VERIFY (expected.or_else([&](my_error e){ return std::expected<unsigned, my_error>{std::unexpected{e}}; })
+			.transform([](int a){ return a * 2u; })
+			.value_or(0) == arg * 2u);
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+int main()
+{
+  volatile unsigned arg = 42;
+  return test_optional(arg)
+	 && test_expected(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-2003.C b/libgomp/testsuite/libgomp.c++/target-flex-2003.C
new file mode 100644
index 0000000..8e8ca8e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-2003.C
@@ -0,0 +1,176 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* bit_cast and memcpy  */
+
+#include <bit>
+#include <cstring>
+
+#include "target-flex-common.h"
+
+struct S0
+{
+  int _v0;
+  char _v1;
+  long long _v2;
+};
+
+struct S1
+{
+  int _v0;
+  char _v1;
+  long long _v2;
+};
+
+bool test_bit_cast(int arg)
+{
+  bool ok;
+  S1 s1_out;
+  #pragma omp target map(from: ok, s1_out) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	long long v = static_cast<long long>(arg + 42ll);
+	S0 s = {arg, 'a', v};
+	VERIFY (std::bit_cast<S1>(s)._v0 == arg);
+	VERIFY (std::bit_cast<S1>(s)._v1 == 'a');
+	VERIFY (std::bit_cast<S1>(s)._v2 == v);
+	s1_out = std::bit_cast<S1>(s);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  long long v = static_cast<long long>(arg + 42ll);
+  VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v0 == arg);
+  VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v1 == 'a');
+  VERIFY_NON_TARGET (std::bit_cast<S0>(s1_out)._v2 == v);
+  return true;
+}
+
+
+struct OutStruct
+{
+  std::size_t _id;
+  void *_next;
+};
+
+struct Extendable1
+{
+  std::size_t _id;
+  void *_next;
+  int _v;
+};
+
+struct Extendable2
+{
+  std::size_t _id;
+  void *_next;
+  char _str[256];
+};
+
+struct Extendable3
+{
+  std::size_t _id;
+  void *_next;
+  const int *_nums;
+  std::size_t _size;
+};
+
+struct ExtendableUnknown
+{
+  std::size_t _id;
+  void *_next;
+};
+
+template<typename To, std::size_t Id>
+To *get_extendable(void *p)
+{
+  while (p != nullptr)
+    {
+      OutStruct out;
+      std::memcpy(&out, p, sizeof(OutStruct));
+      if (out._id == Id)
+	return static_cast<To *>(p);
+      p = out._next;
+    }
+  return nullptr;
+}
+
+bool test_memcpy(int arg, const int *nums, std::size_t nums_size)
+{
+  bool ok;
+  Extendable2 e2_out;
+  #pragma omp target map(from: ok, e2_out) map(to: arg, nums[:nums_size], nums_size)
+    {
+      bool inner_ok = true;
+      {
+	Extendable3 e3 = {3u, nullptr, nums, nums_size};
+	ExtendableUnknown u1 = {100u, &e3};
+	Extendable2 e2 = {2u, &u1, {'H', 'e', 'l', 'l', 'o', '!', '\000'}};
+	ExtendableUnknown u2 = {101u, &e2};
+	ExtendableUnknown u3 = {102u, &u2};
+	ExtendableUnknown u4 = {142u, &u3};
+	Extendable1 e1 = {1u, &u4, arg};
+
+	void *p = &e1;
+	while (p != nullptr)
+	  {
+	    /* You can always cast a pointer to a struct to a pointer to
+	       the type of it's first member.  */
+	    switch (*static_cast<std::size_t *>(p))
+	      {
+		case 1:
+		  {
+		    Extendable1 *e1_p = static_cast<Extendable1 *>(p);
+		    p = e1_p->_next;
+		    VERIFY (e1_p->_v == arg);
+		    break;
+		  }
+		case 2:
+		  {
+		    Extendable2 *e2_p = static_cast<Extendable2 *>(p);
+		    p = e2_p->_next;
+		    VERIFY (std::strcmp(e2_p->_str, "Hello!") == 0);
+		    break;
+		  }
+		case 3:
+		  {
+		    Extendable3 *e3_p = static_cast<Extendable3 *>(p);
+		    p = e3_p->_next;
+		    VERIFY (nums == e3_p->_nums);
+		    VERIFY (nums_size == e3_p->_size);
+		    break;
+		  }
+		default:
+		  {
+		    /* Casting to a pointer to OutStruct invokes undefined
+		       behavior though, memcpy is required to extract the _next
+		       member.  */
+		    OutStruct out;
+		    std::memcpy(&out, p, sizeof(OutStruct));
+		    p = out._next;
+		  }
+	      }
+	  }
+	Extendable2 *e2_p = get_extendable<Extendable2, 2u>(&e1);
+	VERIFY (e2_p != nullptr);
+	e2_out = *e2_p;
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (e2_out._id == 2u);
+  VERIFY_NON_TARGET (std::strcmp(e2_out._str, "Hello!") == 0);
+  return true;
+}
+
+int main()
+{
+  volatile int arg = 42;
+  int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+  return test_bit_cast(arg)
+	 && test_memcpy(arg, arr, 8) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-30.C b/libgomp/testsuite/libgomp.c++/target-flex-30.C
new file mode 100644
index 0000000..c66075b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-30.C
@@ -0,0 +1,51 @@
+/* std::initializer_list in target region.  */
+
+#include <initializer_list>
+#include <array>
+
+#include "target-flex-common.h"
+
+bool test_initializer_list(int arg)
+{
+  static constexpr std::size_t out_arr_size = 7;
+  int out_arr[out_arr_size];
+  bool ok;
+  #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	auto il = {0, 1, 2, 3, 4, 5, arg};
+
+	int sum = 0;
+	for (auto const& e : il)
+	  sum += e;
+	VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg);
+
+	auto* out_it = out_arr;
+	const auto* const out_end = out_arr + out_arr_size;
+	for (auto const& e : il)
+	  {
+	    VERIFY (out_it != out_end);
+	    *out_it = e;
+	    ++out_it;
+	  }
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+
+  std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg};
+  const auto *out_arr_it = out_arr;
+  for (auto const& e : reference_array)
+    VERIFY_NON_TARGET (e == *(out_arr_it++));
+
+  return true;
+}
+
+int main()
+{
+  volatile int arg = 42;
+  return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-300.C b/libgomp/testsuite/libgomp.c++/target-flex-300.C
new file mode 100644
index 0000000..ef9e5a9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-300.C
@@ -0,0 +1,49 @@
+/* { dg-additional-options -std=c++23 } */
+
+/* numerics  */
+
+#include <algorithm>
+#include <numeric>
+#include <ranges>
+#include <span>
+#include <vector>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::all_of, std::ranges::iota)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+bool test(std::size_t arg)
+{
+  bool ok;
+  int midpoint_out;
+  std::vector<int> vec(arg);
+  int *data = vec.data();
+  std::size_t size = vec.size();
+  #pragma omp target defaultmap(none) map(from: ok, midpoint_out) map(tofrom: data[:size]) map(to: arg, size)
+    {
+      std::span span = {data, size};
+      bool inner_ok = true;
+      {
+	VERIFY (stdr::all_of(span, [](int v){ return v == int{}; }));
+	stdr::iota(span, 0);
+	midpoint_out = *std::midpoint(span.data(), span.data() + span.size());
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (stdr::equal(vec, std::views::iota(0, static_cast<int>(vec.size()))));
+  VERIFY_NON_TARGET (*std::midpoint(vec.data(), vec.data() + vec.size())
+		     == midpoint_out);
+  return true;
+}
+
+int main()
+{
+  volatile std::size_t arg = 42;
+  return test(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-31.C b/libgomp/testsuite/libgomp.c++/target-flex-31.C
new file mode 100644
index 0000000..adaf18f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-31.C
@@ -0,0 +1,80 @@
+/* std::initializer_list in target region.  */
+
+#include <initializer_list>
+
+#include "target-flex-common.h"
+
+struct S0
+{
+  int _v;
+  S0(std::initializer_list<int> il)
+    : _v(0)
+  {
+    for (auto const& e : il)
+      _v += e;
+  }
+};
+
+struct S1
+{
+  int _v;
+  template<typename T>
+  S1(std::initializer_list<T> il)
+    : _v(0)
+  {
+    for (auto const& e : il)
+      _v += e;
+  }
+};
+
+template<typename T>
+struct S2
+{
+  T _v;
+  S2(std::initializer_list<T> il)
+    : _v(0)
+  {
+    for (auto const& e : il)
+      _v += e;
+  }
+};
+
+#if __cplusplus >= 201703L
+template<typename T>
+S2(std::initializer_list<T>) -> S2<T>;
+#endif
+
+bool test_initializer_list(int arg)
+{
+  bool ok;
+  #pragma omp target map(from: ok) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	static constexpr int partial_sum = 0 + 1 + 2 + 3 + 4 + 5;
+
+	S0 s0{0, 1, 2, 3, 4, 5, arg};
+	VERIFY (s0._v == partial_sum + arg);
+
+	S1 s1{0, 1, 2, 3, 4, 5, arg};
+	VERIFY (s1._v == partial_sum + arg);
+
+	S2<int> s2{0, 1, 2, 3, 4, 5, arg};
+	VERIFY (s2._v == partial_sum + arg);
+
+	#if __cplusplus >= 201703L
+	  S2 s2_ctad{0, 1, 2, 3, 4, 5, arg};
+	  VERIFY (s2_ctad._v == partial_sum + arg);
+	#endif
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+int main()
+{
+  volatile int arg = 42;
+  return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-32.C b/libgomp/testsuite/libgomp.c++/target-flex-32.C
new file mode 100644
index 0000000..7f74401a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-32.C
@@ -0,0 +1,50 @@
+/* std::initializer_list constructor of std::vector (explicit template arg) */
+
+#include <vector>
+#include <array>
+
+#include "target-flex-common.h"
+
+bool test_initializer_list(int arg)
+{
+  static constexpr std::size_t out_arr_size = 7;
+  int out_arr[out_arr_size];
+  bool ok;
+  #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	std::vector<int> vec{0, 1, 2, 3, 4, 5, arg};
+	int sum = 0;
+	for (auto const& e : vec)
+	  sum += e;
+	VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg);
+
+	auto* out_it = out_arr;
+	const auto* const out_end = out_arr + out_arr_size;
+	for (auto const& e : vec)
+	  {
+	    VERIFY (out_it != out_end);
+	    *out_it = e;
+	    ++out_it;
+	  }
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+
+  std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg};
+  const auto *out_arr_it = out_arr;
+  for (auto const& e : reference_array)
+    VERIFY_NON_TARGET (e == *(out_arr_it++));
+
+  return true;
+}
+
+int main()
+{
+  volatile int arg = 42;
+  return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-33.C b/libgomp/testsuite/libgomp.c++/target-flex-33.C
new file mode 100644
index 0000000..bb8a39b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-33.C
@@ -0,0 +1,52 @@
+/* { dg-additional-options "-std=c++17" } */
+
+/* deduced std::initializer_list constructor of std::vector (CTAD) */
+
+#include <vector>
+#include <array>
+
+#include "target-flex-common.h"
+
+bool test_initializer_list(int arg)
+{
+  static constexpr std::size_t out_arr_size = 7;
+  int out_arr[out_arr_size];
+  bool ok;
+  #pragma omp target map(from: ok, out_arr[:out_arr_size]) map(to: arg)
+    {
+      bool inner_ok = true;
+      {
+	std::vector vec{0, 1, 2, 3, 4, 5, arg};
+	int sum = 0;
+	for (auto const& e : vec)
+	  sum += e;
+	VERIFY (sum == 0 + 1 + 2 + 3 + 4 + 5 + arg);
+
+	auto* out_it = out_arr;
+	const auto* const out_end = out_arr + out_arr_size;
+	for (auto const& e : vec)
+	  {
+	    VERIFY (out_it != out_end);
+	    *out_it = e;
+	    ++out_it;
+	  }
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+
+  std::array<int, out_arr_size> reference_array = {0, 1, 2, 3, 4, 5, arg};
+  const auto *out_arr_it = out_arr;
+  for (auto const& e : reference_array)
+    VERIFY_NON_TARGET (e == *(out_arr_it++));
+
+  return true;
+}
+
+int main()
+{
+  volatile int arg = 42;
+  return test_initializer_list(arg) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-41.C b/libgomp/testsuite/libgomp.c++/target-flex-41.C
new file mode 100644
index 0000000..4d36341
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-41.C
@@ -0,0 +1,94 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* <iterator> c++20  */
+
+/* std::common_iterator uses std::variant.  */
+
+#include <vector>
+#include <iterator>
+#include <span>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::distance, std::ranges::next)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+template<typename It0, typename It1>
+bool simple_equal(const It0 begin0, const It0 end0,
+		  const It1 begin1, const It1 end1) BL_NOEXCEPT
+{
+  It0 it0 = begin0;
+  It1 it1 = begin1;
+  for (; it0 != end0; ++it0, ++it1)
+    if (it1 == end1 || *it0 != *it1)
+      return false;
+  return true;
+}
+
+template<typename It, typename OutIt>
+void simple_copy(const It begin, const It end, OutIt out) BL_NOEXCEPT
+{
+  for (It it = begin; it != end; ++it, ++out)
+    *out = *it;
+}
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_rev_arr[Size];
+  T out_fwd_arr[Size];
+  T out_first_half_arr[Size / 2];
+  #pragma omp target defaultmap(none) \
+		     map(from: ok, out_rev_arr[:Size], out_fwd_arr[:Size], \
+			       out_first_half_arr[:Size / 2]) \
+		     map(to: arr[:Size])
+    {
+      bool inner_ok = true;
+      {
+	std::span<const T> span = {arr, Size};
+	std::vector<T> rev_vec(std::reverse_iterator{span.end()},
+			       std::reverse_iterator{span.begin()});
+	VERIFY (std::distance(span.begin(), span.end())
+		== std::distance(rev_vec.begin(), rev_vec.end()));
+	VERIFY (stdr::distance(span.begin(), span.end())
+		== stdr::distance(rev_vec.begin(), rev_vec.end()));
+	VERIFY (stdr::distance(span) == stdr::distance(rev_vec));
+	VERIFY (simple_equal(span.begin(), span.end(),
+			     std::reverse_iterator{rev_vec.end()},
+			     std::reverse_iterator{rev_vec.begin()}));
+	simple_copy(rev_vec.begin(), rev_vec.end(), out_rev_arr);
+	simple_copy(std::reverse_iterator{rev_vec.end()},
+		    std::reverse_iterator{rev_vec.begin()},
+		    out_fwd_arr);
+	using counted_iter = std::counted_iterator<decltype(span.begin())>;
+	using common_iter = std::common_iterator<counted_iter,
+						 std::default_sentinel_t>;
+	std::vector<T> front_half;
+	simple_copy(common_iter{counted_iter{span.begin(), Size / 2}},
+		    common_iter{std::default_sentinel},
+		    std::back_insert_iterator{front_half});
+	VERIFY (simple_equal(span.begin(), stdr::next(span.begin(), Size / 2),
+			     front_half.begin(), front_half.end()));
+	simple_copy(front_half.begin(), front_half.end(), out_first_half_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  VERIFY_NON_TARGET (simple_equal(std::reverse_iterator{arr + Size},
+				  std::reverse_iterator{arr},
+				  out_rev_arr, out_rev_arr + Size));
+  VERIFY_NON_TARGET (simple_equal(arr, arr + Size,
+				  out_fwd_arr, out_fwd_arr + Size));
+  VERIFY_NON_TARGET (simple_equal(arr, arr + Size / 2,
+				  out_first_half_arr, out_first_half_arr + Size / 2));
+  return ok;
+}
+
+int main()
+{
+  int arr[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-60.C b/libgomp/testsuite/libgomp.c++/target-flex-60.C
new file mode 100644
index 0000000..014b9f5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-60.C
@@ -0,0 +1,46 @@
+/* algorithms pre c++20 */
+
+#include <algorithm>
+#include <vector>
+
+#include "target-flex-common.h"
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_2x_arr[Size];
+  T out_shifted_arr[Size];
+  #pragma omp target map(from: ok, out_2x_arr[:Size], out_shifted_arr[:Size]) \
+		     map(to: arr[:Size])
+    {
+      std::vector<T> vec(Size);
+      std::vector<T> mutated(Size);
+      bool inner_ok = true;
+      {
+	std::copy(arr, arr + Size, vec.begin());
+	VERIFY (std::equal(arr, arr + Size, vec.begin()));
+	std::transform(vec.begin(), vec.end(), mutated.begin(),
+		       [](const T& v){ return v * 2; });
+	std::copy(mutated.begin(), mutated.end(), out_2x_arr);
+	std::rotate(vec.begin(), std::next(vec.begin(), Size / 2), vec.end());
+	std::copy(vec.begin(), vec.end(), out_shifted_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (std::equal(arr, arr + Size, out_2x_arr,
+				[](const T& a, const T& b){ return a * 2 == b; }));
+  std::vector<T> shifted(arr, arr + Size);
+  std::rotate(shifted.begin(), std::next(shifted.begin(), Size / 2), shifted.end());
+  VERIFY_NON_TARGET (std::equal(out_shifted_arr, out_shifted_arr + Size, shifted.begin()));
+  return true;
+}
+
+int main()
+{
+  int arr[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-61.C b/libgomp/testsuite/libgomp.c++/target-flex-61.C
new file mode 100644
index 0000000..9070c2d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-61.C
@@ -0,0 +1,54 @@
+/* { dg-additional-options "-std=c++20" } */
+
+/* ranged algorithms c++20 */
+
+#include <algorithm>
+#include <ranges>
+#include <vector>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::copy, std::ranges::equal, std::ranges::rotate, std::ranges::transform)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_2x_arr[Size];
+  T out_shifted_arr[Size];
+  #pragma omp target defaultmap(none) \
+		     map(from: ok, out_2x_arr[:Size], out_shifted_arr[:Size]) \
+		     map(to: arr[:Size])
+    {
+      std::vector<T> vec(Size);
+      std::vector<T> mutated(Size);
+      bool inner_ok = true;
+      {
+	stdr::copy(arr, vec.begin());
+	VERIFY (stdr::equal(arr, vec));
+	stdr::transform(vec, mutated.begin(),
+			[](const T& v){ return v * 2; });
+	stdr::copy(mutated, out_2x_arr);
+	stdr::rotate(vec, std::next(vec.begin(), Size / 2));
+	stdr::copy(vec, out_shifted_arr);
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (stdr::equal(arr, out_2x_arr, stdr::equal_to{}, [](const T& v){ return v * 2; }));
+  std::vector<T> shifted(arr, arr + Size);
+  stdr::rotate(shifted, std::next(shifted.begin(), Size / 2));
+  VERIFY_NON_TARGET (stdr::equal(out_shifted_arr, shifted));
+  return true;
+}
+
+int main()
+{
+  int arr[] = {0, 1, 2, 3, 4, 5, 6, 7};
+  return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-62.C b/libgomp/testsuite/libgomp.c++/target-flex-62.C
new file mode 100644
index 0000000..ef6b942
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-62.C
@@ -0,0 +1,50 @@
+/* { dg-additional-options -std=c++23 } */
+
+/* std::views stuff.  Also tests std::tuple with std::views::zip.  */
+
+#include <algorithm>
+#include <ranges>
+#include <span>
+
+//TODO PR120454 "C++ constexpr vs. OpenMP implicit mapping"
+#pragma omp declare target(std::ranges::all_of, std::ranges::equal, std::ranges::fold_left, std::views::reverse, std::views::zip)
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+namespace stdv = std::views;
+
+bool f()
+{
+  const int arr_fwd[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+  const int arr_rev[8] = {7, 6, 5, 4, 3, 2, 1, 0};
+
+  bool ok;
+  #pragma omp target defaultmap(none) map(from: ok) map(to: arr_fwd[:8], arr_rev[:8])
+    {
+      std::span<const int> fwd = {arr_fwd, 8};
+      std::span<const int> rev = {arr_rev, 8};
+      bool inner_ok = true;
+      {
+	VERIFY(stdr::equal(fwd, rev | stdv::reverse));
+	VERIFY(stdr::equal(fwd | stdv::drop(4) | stdv::reverse,
+			   rev | stdv::take(4)));
+	for (auto [first, second] : stdv::zip(fwd, rev))
+	  VERIFY(first + second == 7);
+	auto plus = [](int a, int b){ return a + b; };
+	auto is_even = [](int v){ return v % 2 == 0; };
+	VERIFY(stdr::fold_left(fwd | stdv::filter(is_even), 0, plus)
+	       == 12);
+	VERIFY(stdr::all_of(fwd | stdv::transform([](int v){ return v * 2; }),
+			    is_even));
+      }
+      end:
+      ok = inner_ok;
+    }
+  return ok;
+}
+
+int main()
+{
+  return f() ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-70.C b/libgomp/testsuite/libgomp.c++/target-flex-70.C
new file mode 100644
index 0000000..9e9383d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-70.C
@@ -0,0 +1,26 @@
+/* CTAD in target regions.  */
+
+template<typename T>
+struct S
+{
+  T _v;
+};
+
+template<typename T>
+S(T) -> S<T>;
+
+bool f()
+{
+  bool ok;
+  #pragma omp target map(from: ok)
+    {
+      S s{42};
+      ok = s._v == 42;
+    }
+  return ok;
+}
+
+int main()
+{
+  return f() ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-80.C b/libgomp/testsuite/libgomp.c++/target-flex-80.C
new file mode 100644
index 0000000..f41a1bb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-80.C
@@ -0,0 +1,49 @@
+// { dg-additional-options "-std=c++20" }
+
+/* std::span  */
+
+#include <span>
+
+#include "target-flex-common.h"
+
+template<typename It0, typename It1>
+bool simple_equal(It0 it0, const It0 end0,
+		  It1 it1, const It1 end1) noexcept
+{
+  for (; it0 != end0; ++it0, ++it1)
+    if (it1 == end1 || *it0 != *it1)
+      return false;
+  return true;
+}
+
+template<typename T, std::size_t Size>
+bool test(const T (&arr)[Size])
+{
+  bool ok;
+  T out_arr[Size];
+  #pragma omp target map(from: ok) map(to: arr[:Size])
+    {
+      std::span span = {arr, Size};
+      bool inner_ok = true;
+      {
+	VERIFY (!span.empty());
+	VERIFY (span.size() == Size);
+	auto out_it = out_arr;
+	for (auto elem : span)
+	  *(out_it++) = elem;
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (simple_equal(arr, arr + Size,
+				  out_arr, out_arr + Size));
+  return true;
+}
+
+int main()
+{
+  int arr[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+  return test(arr) ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-81.C b/libgomp/testsuite/libgomp.c++/target-flex-81.C
new file mode 100644
index 0000000..a86fefb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-81.C
@@ -0,0 +1,75 @@
+/* { dg-additional-options "-std=c++20" } */
+
+#include <ranges>
+#include <span>
+#include <type_traits>
+#include <vector>
+
+#include "target-flex-common.h"
+
+namespace stdr = std::ranges;
+
+template<typename It0, typename It1>
+bool simple_equal(It0 it0, const It0 end0,
+		  It1 it1, const It1 end1) noexcept
+{
+  for (; it0 != end0; ++it0, ++it1)
+    if (it1 == end1 || *it0 != *it1)
+      return false;
+  return true;
+}
+
+template<typename Rn0, typename Rn1>
+bool simple_equal(Rn0&& rn0, Rn1&& rn1) noexcept
+{
+  return simple_equal(stdr::begin(rn0), stdr::end(rn0),
+		      stdr::begin(rn1), stdr::end(rn1));
+}
+
+template<typename Rn>
+bool test(Rn&& range)
+{
+  using value_type = stdr::range_value_t<std::remove_cvref_t<Rn>>;
+  std::vector<value_type> vec = {stdr::begin(range), stdr::end(range)};
+  value_type *data = vec.data();
+  std::size_t size = vec.size();
+  bool ok;
+  #pragma omp target map(from: ok) map(tofrom: data[:size]) map(to: size)
+    {
+      std::vector<value_type> orig = {data, data + size};
+      std::span<value_type> span = {data, size};
+      bool inner_ok = true;
+      {
+	auto mul_by_2 = [](const value_type& v){ return v * 2; };
+	VERIFY (simple_equal(orig, span));
+	for (auto& elem : span)
+	  elem = mul_by_2(elem);
+	VERIFY (simple_equal(orig | std::views::transform(mul_by_2), span));
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  auto mul_by_2 = [](const value_type& v){ return v * 2; };
+  VERIFY_NON_TARGET (simple_equal(range | std::views::transform(mul_by_2), vec));
+  return true;
+}
+
+struct my_int
+{
+  int _v;
+  bool operator==(my_int const&) const = default;
+  my_int operator*(int rhs) const noexcept {
+    return {_v * rhs};
+  }
+};
+
+int main()
+{
+  std::vector<int> ints = {1, 2, 3, 4, 5};
+  const bool ints_res = test(ints);
+  std::vector<my_int> my_ints = {my_int{1}, my_int{2}, my_int{3}, my_int{4}, my_int{5}};
+  const bool my_ints_res = test(my_ints);
+  return ints_res && my_ints_res ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-90.C b/libgomp/testsuite/libgomp.c++/target-flex-90.C
new file mode 100644
index 0000000..b3f1197
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-90.C
@@ -0,0 +1,107 @@
+/* structured bindings  */
+
+#include <array>
+#include <tuple>
+
+#include "target-flex-common.h"
+
+template<typename Array, typename Tuple, typename Struct>
+bool test(Array array, Tuple tuple, Struct s)
+{
+  bool ok;
+  auto array_2nd_in = std::get<2>(array);
+  auto tuple_2nd_in = std::get<2>(tuple);
+  auto s_2nd_in = s._2;
+  decltype(array_2nd_in) array_2nd_out_0;
+  decltype(tuple_2nd_in) tuple_2nd_out_0;
+  decltype(s_2nd_in) s_2nd_out_0;
+  decltype(array_2nd_in) array_2nd_out_1;
+  decltype(tuple_2nd_in) tuple_2nd_out_1;
+  decltype(s_2nd_in) s_2nd_out_1;
+  decltype(array_2nd_in) array_2nd_out_2;
+  decltype(tuple_2nd_in) tuple_2nd_out_2;
+  decltype(s_2nd_in) s_2nd_out_2;
+  #pragma omp target map(from: ok, \
+			       array_2nd_out_0, tuple_2nd_out_0, s_2nd_out_0, \
+			       array_2nd_out_1, tuple_2nd_out_1, s_2nd_out_1, \
+			       array_2nd_out_2, tuple_2nd_out_2, s_2nd_out_2) \
+		     map(to: array_2nd_in, tuple_2nd_in, s_2nd_in, array, tuple, s)
+    {
+      bool inner_ok = true;
+      {
+	{
+	  auto [array_0th, array_1st, array_2nd] = array;
+	  VERIFY (array_2nd_in == array_2nd);
+	  VERIFY (std::get<2>(array) == array_2nd);
+	  array_2nd_out_0 = array_2nd;
+	  auto [tuple_0th, tuple_1st, tuple_2nd] = tuple;
+	  VERIFY (tuple_2nd_in == tuple_2nd);
+	  VERIFY (std::get<2>(tuple) == tuple_2nd);
+	  tuple_2nd_out_0 = tuple_2nd;
+	  auto [s_0th, s_1st, s_2nd] = s;
+	  VERIFY (s_2nd_in == s_2nd);
+	  VERIFY (s._2 == s_2nd);
+	  s_2nd_out_0 = s_2nd;
+	}
+	{
+	  auto& [array_0th, array_1st, array_2nd] = array;
+	  VERIFY (array_2nd_in == array_2nd);
+	  VERIFY (std::get<2>(array) == array_2nd);
+	  array_2nd_out_1 = array_2nd;
+	  auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple;
+	  VERIFY (tuple_2nd_in == tuple_2nd);
+	  VERIFY (std::get<2>(tuple) == tuple_2nd);
+	  tuple_2nd_out_1 = tuple_2nd;
+	  auto& [s_0th, s_1st, s_2nd] = s;
+	  VERIFY (s_2nd_in == s_2nd);
+	  VERIFY (s._2 == s_2nd);
+	  s_2nd_out_1 = s_2nd;
+	}
+	{
+	  const auto& [array_0th, array_1st, array_2nd] = array;
+	  VERIFY (array_2nd_in == array_2nd);
+	  VERIFY (std::get<2>(array) == array_2nd);
+	  array_2nd_out_2 = array_2nd;
+	  const auto& [tuple_0th, tuple_1st, tuple_2nd] = tuple;
+	  VERIFY (tuple_2nd_in == tuple_2nd);
+	  VERIFY (std::get<2>(tuple) == tuple_2nd);
+	  tuple_2nd_out_2 = tuple_2nd;
+	  const auto& [s_0th, s_1st, s_2nd] = s;
+	  VERIFY (s_2nd_in == s_2nd);
+	  VERIFY (s._2 == s_2nd);
+	  s_2nd_out_2 = s_2nd;
+	}
+      }
+      end:
+      ok = inner_ok;
+    }
+  if (!ok)
+    return false;
+  VERIFY_NON_TARGET (array_2nd_out_0 == array_2nd_in);
+  VERIFY_NON_TARGET (tuple_2nd_out_0 == tuple_2nd_in);
+  VERIFY_NON_TARGET (s_2nd_out_0 == s_2nd_in);
+  VERIFY_NON_TARGET (array_2nd_out_1 == array_2nd_in);
+  VERIFY_NON_TARGET (tuple_2nd_out_1 == tuple_2nd_in);
+  VERIFY_NON_TARGET (s_2nd_out_1 == s_2nd_in);
+  VERIFY_NON_TARGET (array_2nd_out_2 == array_2nd_in);
+  VERIFY_NON_TARGET (tuple_2nd_out_2 == tuple_2nd_in);
+  VERIFY_NON_TARGET (s_2nd_out_2 == s_2nd_in);
+
+  return true;
+}
+
+struct S
+{
+  char _0;
+  float _1;
+  int _2;
+};
+
+int main()
+{
+  const bool test_res
+    = test(std::array{0, 1, 2},
+	   std::tuple{'a', 3.14f, 42},
+	   S{'a', 3.14f, 42});
+  return test_res ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-flex-common.h b/libgomp/testsuite/libgomp.c++/target-flex-common.h
new file mode 100644
index 0000000..14523c4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-flex-common.h
@@ -0,0 +1,40 @@
+#include <cstdio>
+
+#if __cplusplus >= 201103L
+  #define BL_NOEXCEPT noexcept
+#else
+  #define BL_NOEXCEPT throw()
+#endif
+
+#if defined __has_builtin
+# if __has_builtin (__builtin_LINE)
+#  define VERIFY_LINE __builtin_LINE ()
+# endif
+#endif
+#if !defined VERIFY_LINE
+# define VERIFY_LINE __LINE__
+#endif
+
+/* I'm not a huge fan of macros but in the interest of keeping the code that
+   isn't being tested as simple as possible, we use them.  */
+
+#define VERIFY(EXPR) \
+  do {										\
+    if (!(EXPR))								\
+      {										\
+	std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n",		\
+		    VERIFY_LINE);						\
+	inner_ok = false;							\
+	goto end;								\
+      }										\
+  } while (false)
+
+#define VERIFY_NON_TARGET(EXPR) \
+  do {										\
+    if (!(EXPR))								\
+      {										\
+	std::printf("VERIFY ln: %d `" #EXPR "` evaluated to false\n",		\
+		    VERIFY_LINE);						\
+	return false;								\
+      }										\
+  } while (false)
diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C
new file mode 100644
index 0000000..9923783
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__array-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C
new file mode 100644
index 0000000..c42105a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__array-concurrent.C
@@ -0,0 +1,62 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <array>
+#include <algorithm>
+
+#define N 50000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::array<int,N> &arr, int data[])
+{
+  for (int i = 0; i < N; ++i)
+    if (arr[i] != data[i] * data[i])
+      return false;
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+  std::array<int,N> arr;
+
+  srand (time (NULL));
+  init (data);
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[:N]) map (alloc: arr)
+#endif
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&arr) std::array<int,N> ();
+#endif
+	  std::copy (data, data + N, arr.begin ());
+	}
+
+      #pragma omp target teams distribute parallel for
+	for (int i = 0; i < N; ++i)
+	  arr[i] *= arr[i];
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (arr, data);
+#ifndef MEM_SHARED
+	  arr.~array ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C
new file mode 100644
index 0000000..9023ef8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__bitset-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C
new file mode 100644
index 0000000..4fcce93
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__bitset-concurrent.C
@@ -0,0 +1,69 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <bitset>
+#include <set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+  std::set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array.
+      do
+	data[i] = rand () % MAX;
+      while (_set.find (data[i]) != _set.end ());
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::bitset<MAX> _set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[:N]) map (alloc: _set)
+#endif
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&_set) std::bitset<MAX> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    _set[data[i]] = true;
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  if (_set[i])
+	    sum += i;
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	_set.~bitset ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__cmath.C b/libgomp/testsuite/libgomp.c++/target-std__cmath.C
new file mode 100644
index 0000000..aaf7152
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__cmath.C
@@ -0,0 +1,340 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+
+#include <cmath>
+#include <numbers>
+
+#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6)
+
+#pragma omp declare target
+template<typename T> bool test_basic ()
+{
+  T x = -3.456789;
+  T y = 1.234567;
+  T z = 5.678901;
+
+  if (std::abs (x) != -x)
+    return false;
+  if (!FP_EQUAL (std::trunc (x / y) * y + std::fmod (x, y), x))
+    return false;
+  if (!FP_EQUAL (x - std::round (x / y) * y, std::remainder (x, y)))
+    return false;
+  if (!FP_EQUAL (std::fma (x, y, z), x * y + z))
+    return false;
+  if (std::fmax (x, y) != (x > y ? x : y))
+    return false;
+  if (std::fmin (x, y) != (x < y ? x : y))
+    return false;
+  if (std::fdim (x, y) != std::max(x - y, (T) 0.0))
+    return false;
+  if (std::fdim (y, x) != std::max(y - x, (T) 0.0))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_exp ()
+{
+  T x = -4.567890;
+  T y = 2.345678;
+
+  if (!FP_EQUAL (std::exp (x), std::pow (std::numbers::e_v<T>, x)))
+    return false;
+  if (!FP_EQUAL (std::exp2 (y), std::pow ((T) 2.0, y)))
+    return false;
+  if (!FP_EQUAL (std::expm1 (y), std::exp (y) - (T) 1.0))
+    return false;
+  if (!FP_EQUAL (std::log (std::exp (x)), x))
+    return false;
+  if (!FP_EQUAL (std::log10 (std::pow ((T) 10.0, y)), y))
+    return false;
+  if (!FP_EQUAL (std::log2 (std::exp2 (y)), y))
+    return false;
+  if (!FP_EQUAL (std::log1p (std::expm1 (y)), y))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_power ()
+{
+  T x = 7.234251;
+  T y = 0.340128;
+
+  if (!FP_EQUAL (std::log (std::pow (x, y)) / std::log (x), y))
+    return false;
+  if (!FP_EQUAL (std::sqrt (x) * std::sqrt (x), x))
+    return false;
+  if (!FP_EQUAL (std::cbrt (x) * std::cbrt (x) * std::cbrt (x), x))
+    return false;
+  if (!FP_EQUAL (std::hypot (x, y), std::sqrt (x * x + y * y)))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_trig ()
+{
+  T theta = std::numbers::pi / 4;
+  T phi = std::numbers::pi / 6;
+
+  if (!FP_EQUAL (std::sin (theta), std::sqrt ((T) 2) / 2))
+    return false;
+  if (!FP_EQUAL (std::sin (phi), 0.5))
+    return false;
+  if (!FP_EQUAL (std::cos (theta), std::sqrt ((T) 2) / 2))
+    return false;
+  if (!FP_EQUAL (std::cos (phi), std::sqrt ((T) 3) / 2))
+    return false;
+  if (!FP_EQUAL (std::tan (theta), 1.0))
+    return false;
+  if (!FP_EQUAL (std::tan (phi), std::sqrt ((T) 3) / 3))
+    return false;
+
+  T x = 0.33245623;
+
+  if (!FP_EQUAL (std::asin (std::sin (x)), x))
+    return false;
+  if (!FP_EQUAL (std::acos (std::cos (x)), x))
+    return false;
+  if (!FP_EQUAL (std::atan (std::tan (x)), x))
+    return false;
+  if (!FP_EQUAL (std::atan2 (std::sin (x), std::cos (x)), x))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_hyperbolic ()
+{
+  T x = 0.7423532;
+
+  if (!FP_EQUAL (std::sinh (x), (std::exp (x) - std::exp (-x)) / (T) 2.0))
+    return false;
+  if (!FP_EQUAL (std::cosh (x), (std::exp (x) + std::exp (-x)) / (T) 2.0))
+    return false;
+  if (!FP_EQUAL (std::tanh (x), std::sinh (x) / std::cosh (x)))
+    return false;
+  if (!FP_EQUAL (std::asinh (std::sinh (x)), x))
+    return false;
+  if (!FP_EQUAL (std::acosh (std::cosh (x)), x))
+    return false;
+  if (!FP_EQUAL (std::atanh (std::tanh (x)), x))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_erf ()
+{
+  if (!FP_EQUAL (std::erf ((T) 0), 0))
+    return false;
+  if (!FP_EQUAL (std::erf ((T) INFINITY), 1))
+    return false;
+  if (!FP_EQUAL (std::erf ((T) -INFINITY), -1))
+    return false;
+
+  if (!FP_EQUAL (std::erfc (0), 1))
+    return false;
+  if (!FP_EQUAL (std::erfc ((T) INFINITY), 0))
+    return false;
+  if (!FP_EQUAL (std::erfc ((T) -INFINITY), 2))
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_gamma ()
+{
+  if (!FP_EQUAL (std::tgamma ((T) 5), 4*3*2*1))
+    return false;
+  if (!FP_EQUAL (std::tgamma ((T) 0.5), std::sqrt (std::numbers::pi_v<T>)))
+    return false;
+  if (!FP_EQUAL (std::tgamma ((T) -0.5), (T) -2 * std::sqrt (std::numbers::pi_v<T>)))
+    return false;
+  if (!FP_EQUAL (std::tgamma ((T) 2.5), (T) 0.75 * std::sqrt (std::numbers::pi_v<T>)))
+    return false;
+  if (!FP_EQUAL (std::tgamma ((T) -2.5), (T) -8.0/15 * std::sqrt (std::numbers::pi_v<T>)))
+    return false;
+
+  if (!FP_EQUAL (std::lgamma ((T) 5), std::log ((T) 4*3*2*1)))
+    return false;
+  if (!FP_EQUAL (std::lgamma ((T) 0.5), std::log (std::sqrt (std::numbers::pi_v<T>))))
+    return false;
+  if (!FP_EQUAL (std::lgamma ((T) 2.5),
+		 std::log ((T) 0.75 * std::sqrt (std::numbers::pi_v<T>))))
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_rounding ()
+{
+  T x = -2.5678;
+  T y = 3.6789;
+
+  if (std::ceil (x) != -2)
+    return false;
+  if (std::floor (x) != -3)
+    return false;
+  if (std::trunc (x) != -2)
+    return false;
+  if (std::round (x) != -3)
+    return false;
+
+  if (std::ceil (y) != 4)
+    return false;
+  if (std::floor (y) != 3)
+    return false;
+  if (std::trunc (y) != 3)
+    return false;
+  if (std::round (y) != 4)
+    return false;
+
+  /* Not testing std::rint and std::nearbyint due to dependence on
+     floating-point environment.  */
+
+  return true;
+}
+
+template<typename T> bool test_fpmanip ()
+{
+  T x = -2.3456789;
+  T y = 3.6789012;
+  int exp;
+
+  T mantissa = std::frexp (x, &exp);
+  if (std::ldexp (mantissa, exp) != x)
+    return false;
+  if (std::logb (x) + 1 != exp)
+    return false;
+  if (std::ilogb (x) + 1 != exp)
+    return false;
+  if (std::scalbn (x, -exp) != mantissa)
+    return false;
+
+  T next = std::nextafter (x, y);
+  if (!(next > x && next < y))
+    return false;
+
+#if 0
+  /* TODO Due to 'std::nexttoward' using 'long double to', this triggers a
+     '80-bit-precision floating-point numbers unsupported (mode ‘XF’)' error
+     with x86_64 host and nvptx, GCN offload compilers, or
+     '128-bit-precision floating-point numbers unsupported (mode ‘TF’)' error
+     with powerpc64le host and nvptx offload compiler, for example;
+     PR71064 'nvptx offloading: "long double" data type'.
+     It ought to work on systems where the host's 'long double' is the same as
+     'double' ('DF'): aarch64, for example?  */
+  next = std::nexttoward (x, y);
+  if (!(next > x && next < y))
+    return false;
+#endif
+
+  if (std::copysign (x, y) != std::abs (x))
+    return false;
+  if (std::copysign (y, x) != -y)
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_classify ()
+{
+  T x = -2.3456789;
+  T y = 3.6789012;
+
+  if (std::fpclassify (x) != FP_NORMAL || std::fpclassify (y) != FP_NORMAL)
+    return false;
+  if (std::fpclassify ((T) INFINITY) != FP_INFINITE
+      || std::fpclassify ((T) -INFINITY) != FP_INFINITE)
+    return false;
+  if (std::fpclassify ((T) 0.0) != FP_ZERO)
+    return false;
+  if (std::fpclassify ((T) NAN) != FP_NAN)
+    return false;
+  if (!std::isfinite (x) || !std::isfinite (y))
+    return false;
+  if (std::isfinite ((T) INFINITY) || std::isfinite ((T) -INFINITY))
+    return false;
+  if (std::isinf (x) || std::isinf (y))
+    return false;
+  if (!std::isinf ((T) INFINITY) || !std::isinf ((T) -INFINITY))
+    return false;
+  if (std::isnan (x) || std::isnan (y))
+    return false;
+  if (!std::isnan ((T) 0.0 / (T) 0.0))
+    return false;
+  if (std::isnan (x) || std::isnan (y))
+    return false;
+  if (!std::isnormal (x) || !std::isnormal (y))
+    return false;
+  if (std::isnormal ((T) 0.0) || std::isnormal ((T) INFINITY) || std::isnormal ((T) NAN))
+    return false;
+  if (!std::signbit (x) || std::signbit (y))
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_compare ()
+{
+  T x = 5.6789012;
+  T y = 8.9012345;
+
+  if (std::isgreater (x, y))
+    return false;
+  if (std::isgreater (x, x))
+    return false;
+  if (std::isgreaterequal (x, y))
+    return false;
+  if (!std::isgreaterequal (x, x))
+    return false;
+  if (!std::isless (x, y))
+    return false;
+  if (std::isless (x, x))
+    return false;
+  if (!std::islessequal (x, y))
+    return false;
+  if (!std::islessequal (x, x))
+    return false;
+  if (!std::islessgreater (x, y))
+    return false;
+  if (std::islessgreater (x, x))
+    return false;
+  if (std::isunordered (x, y))
+    return false;
+  if (!std::isunordered (x, NAN))
+    return false;
+  return true;
+}
+#pragma omp end declare target
+
+#define RUN_TEST(func) \
+{ \
+  pass++; \
+  bool ok = test_##func<float> (); \
+  if (!ok) { result = pass; break; } \
+  pass++; \
+  ok = test_##func<double> (); \
+  if (!ok) { result = pass; break; } \
+}
+
+int main (void)
+{
+  int result = 0;
+
+  #pragma omp target map (tofrom: result)
+    do {
+      int pass = 0;
+
+      RUN_TEST (basic);
+      RUN_TEST (exp);
+      RUN_TEST (power);
+      RUN_TEST (trig);
+      RUN_TEST (hyperbolic);
+      RUN_TEST (erf);
+      RUN_TEST (gamma);
+      RUN_TEST (rounding);
+      RUN_TEST (fpmanip);
+      RUN_TEST (classify);
+      RUN_TEST (compare);
+    } while (false);
+
+  return result;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__complex.C b/libgomp/testsuite/libgomp.c++/target-std__complex.C
new file mode 100644
index 0000000..e392d17
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__complex.C
@@ -0,0 +1,175 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+
+#include <cmath>
+#include <complex>
+#include <numbers>
+
+using namespace std::complex_literals;
+
+#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6)
+#define COMPLEX_EQUAL(x,y) (FP_EQUAL ((x).real (), (y).real ()) \
+			    && FP_EQUAL ((x).imag (), (y).imag ()))
+
+#pragma omp declare target
+template<typename T> bool test_complex ()
+{
+  std::complex<T> z (-1.334, 5.763);
+
+  if (!FP_EQUAL (z.real (), (T) -1.334))
+    return false;
+  if (!FP_EQUAL (z.imag (), (T) 5.763))
+    return false;
+  if (!FP_EQUAL (std::abs (z),
+		 std::sqrt (z.real () * z.real () + z.imag () * z.imag ())))
+    return false;
+  if (!FP_EQUAL (std::arg (z), std::atan2 (z.imag (), z.real ())))
+    return false;
+  if (!FP_EQUAL (std::norm (z), z.real () * z.real () + z.imag () * z.imag ()))
+    return false;
+
+  auto conj = std::conj (z);
+  if (!FP_EQUAL (conj.real (), z.real ())
+      || !FP_EQUAL (conj.imag (), -z.imag ()))
+    return false;
+
+  if (std::proj (z) != z)
+    return false;
+
+  auto infz1 = std::proj (std::complex<float> (INFINITY, -1));
+  if (infz1.real () != INFINITY || infz1.imag () != (T) -0.0)
+    return false;
+  auto infz2 = std::proj (std::complex<float> (0, -INFINITY));
+  if (infz2.real () != INFINITY || infz2.imag () != (T) -0.0)
+    return false;
+
+  auto polarz = std::polar ((T) 1.5, std::numbers::pi_v<T> / 4);
+  if (!FP_EQUAL (polarz.real (), (T) 1.5 * std::cos (std::numbers::pi_v<T> / 4))
+      || !FP_EQUAL (polarz.imag (),
+		    (T) 1.5* std::sin (std::numbers::pi_v<T> / 4)))
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_complex_exp_log ()
+{
+  std::complex<T> z (-1.724, -3.763);
+
+  // Euler's identity
+  auto eulerz = std::exp (std::complex<T> (0, std::numbers::pi));
+  eulerz += 1.0;
+  if (!COMPLEX_EQUAL (eulerz, std::complex<T> ()))
+    return false;
+
+  auto my_exp_z
+    = std::complex<T> (std::exp (z.real ()) * std::cos (z.imag ()),
+		       std::exp (z.real ()) * std::sin (z.imag ()));
+  if (!COMPLEX_EQUAL (std::exp (z), my_exp_z))
+    return false;
+
+  if (!COMPLEX_EQUAL (std::log10 (z),
+		      std::log (z) / std::log (std::complex<T> (10))))
+    return false;
+
+  return true;
+}
+
+template<typename T> bool test_complex_trig ()
+{
+  std::complex<T> z (std::numbers::pi / 8, std::numbers::pi / 10);
+  const std::complex<T> i (0, 1);
+
+  auto my_sin_z
+    = std::complex<T> (std::sin (z.real ()) * std::cosh (z.imag ()),
+		       std::cos (z.real ()) * std::sinh (z.imag ()));
+  if (!COMPLEX_EQUAL (std::sin (z), my_sin_z))
+    return false;
+
+  auto my_cos_z
+    = std::complex<T> (std::cos (z.real ()) * std::cosh (z.imag ()),
+		       -std::sin (z.real ()) * std::sinh (z.imag ()));
+  if (!COMPLEX_EQUAL (std::cos (z), my_cos_z))
+    return false;
+
+  auto my_tan_z
+    = std::complex<T> (std::sin (2*z.real ()), std::sinh (2*z.imag ()))
+      / (std::cos (2*z.real ()) + std::cosh (2*z.imag ()));
+  if (!COMPLEX_EQUAL (std::tan (z), my_tan_z))
+    return false;
+
+  auto my_sinh_z
+    = std::complex<T> (std::sinh (z.real ()) * std::cos (z.imag ()),
+		       std::cosh (z.real ()) * std::sin (z.imag ()));
+  if (!COMPLEX_EQUAL (std::sinh (z), my_sinh_z))
+    return false;
+
+  auto my_cosh_z
+    = std::complex<T> (std::cosh (z.real ()) * std::cos (z.imag ()),
+		       std::sinh (z.real ()) * std::sin (z.imag ()));
+  if (!COMPLEX_EQUAL (std::cosh (z), my_cosh_z))
+    return false;
+
+  auto my_tanh_z
+    = std::complex<T> (std::sinh (2*z.real ()),
+		       std::sin (2*z.imag ()))
+		       / (std::cosh (2*z.real ()) + std::cos (2*z.imag ()));
+  if (!COMPLEX_EQUAL (std::tanh (z), my_tanh_z))
+    return false;
+
+  auto my_asin_z = -i * std::log (i * z + std::sqrt ((T) 1.0 - z*z));
+  if (!COMPLEX_EQUAL (std::asin (z), my_asin_z))
+    return false;
+
+  auto my_acos_z
+    = std::complex<T> (std::numbers::pi / 2)
+		       + i * std::log (i * z + std::sqrt ((T) 1.0 - z*z));
+  if (!COMPLEX_EQUAL (std::acos (z), my_acos_z))
+    return false;
+
+  auto my_atan_z = std::complex<T> (0, -0.5) * (std::log ((i - z) / (i + z)));
+  if (!COMPLEX_EQUAL (std::atan (z), my_atan_z))
+    return false;
+
+  auto my_asinh_z = std::log (z + std::sqrt (z*z + (T) 1.0));
+  if (!COMPLEX_EQUAL (std::asinh (z), my_asinh_z))
+    return false;
+
+  auto my_acosh_z = std::log (z + std::sqrt (z*z - (T) 1.0));
+  if (!COMPLEX_EQUAL (std::acosh (z), my_acosh_z))
+    return false;
+
+  auto my_atanh_z
+    = std::complex<T> (0.5) * (std::log ((T) 1.0 + z) - std::log ((T) 1.0 - z));
+  if (!COMPLEX_EQUAL (std::atanh (z), my_atanh_z))
+    return false;
+
+  return true;
+}
+#pragma omp end declare target
+
+#define RUN_TEST(func) \
+{ \
+  pass++; \
+  bool ok = test_##func<float> (); \
+  if (!ok) { result = pass; break; } \
+  pass++; \
+  ok = test_##func<double> (); \
+  if (!ok) { result = pass; break; } \
+}
+
+int main (void)
+{
+  int result = 0;
+
+  #pragma omp target map (tofrom: result)
+    do {
+      int pass = 0;
+
+      RUN_TEST (complex);
+      RUN_TEST (complex_exp_log);
+      RUN_TEST (complex_trig);
+    } while (false);
+
+  return result;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C
new file mode 100644
index 0000000..863a1de
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__deque-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C
new file mode 100644
index 0000000..9c2d6fa
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__deque-concurrent.C
@@ -0,0 +1,64 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <deque>
+#include <algorithm>
+
+#define N 50000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::deque<int> &_deque, int data[])
+{
+  for (int i = 0; i < N; ++i)
+    if (_deque[i] != data[i] * data[i])
+      return false;
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+
+  srand (time (NULL));
+  init (data);
+
+#ifdef MEM_SHARED
+  std::deque<int> _deque (std::begin (data), std::end (data));
+#else
+  std::deque<int> _deque;
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[:N]) map (alloc: _deque)
+#endif
+    {
+#ifndef MEM_SHARED
+      #pragma omp target
+	new (&_deque) std::deque<int> (std::begin (data), std::end (data));
+#endif
+
+      #pragma omp target teams distribute parallel for
+	for (int i = 0; i < N; ++i)
+	  _deque[i] *= _deque[i];
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (_deque, data);
+#ifndef MEM_SHARED
+	  _deque.~deque ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C
new file mode 100644
index 0000000..9e59907
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_map-concurrent.C
@@ -0,0 +1,71 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+/* { dg-ice {TODO PR120450} { offload_target_amdgcn && { ! offload_device_shared_as } } }
+   { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn && { ! offload_device_shared_as } } } }
+   (For effective-target 'offload_device_shared_as', we've got '-DMEM_SHARED', and therefore don't invoke the constructor with placement new.)  */
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <flat_map>
+
+#define N 3000
+
+void init (int data[], bool unique)
+{
+  std::set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array if unique is true.
+      do
+	data[i] = rand ();
+      while (unique && _set.count (data[i]) > 0);
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::flat_map<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, true);
+  init (data, false);
+
+  #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::flat_map<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map[keys[i]] = data[i];
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < N; ++i)
+      sum += (long long) keys[i] * _map[keys[i]];
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~flat_map ();
+#endif
+
+  #pragma omp target exit data map (release: _map)
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C
new file mode 100644
index 0000000..1dc60c8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multimap-concurrent.C
@@ -0,0 +1,70 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+/* { dg-ice {TODO PR120450} { offload_target_amdgcn && { ! offload_device_shared_as } } }
+   { dg-excess-errors {'mkoffload' failure etc.} { xfail { offload_target_amdgcn && { ! offload_device_shared_as } } } }
+   (For effective-target 'offload_device_shared_as', we've got '-DMEM_SHARED', and therefore don't invoke the constructor with placement new.)  */
+
+#include <stdlib.h>
+#include <time.h>
+#include <flat_map>
+
+// Make sure that KEY_MAX is less than N to ensure some duplicate keys.
+#define N 3000
+#define KEY_MAX 1000
+
+void init (int data[], int max)
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = i % max;
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::flat_multimap<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, KEY_MAX);
+  init (data, RAND_MAX);
+
+  #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::flat_multimap<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map.insert({keys[i], data[i]});
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < KEY_MAX; ++i)
+      {
+	auto range = _map.equal_range (i);
+	for (auto it = range.first; it != range.second; ++it) {
+	  sum += (long long) it->first * it->second;
+	}
+      }
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~flat_multimap ();
+#endif
+
+  #pragma omp target exit data map (release: _map)
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C
new file mode 100644
index 0000000..59b59bf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_multiset-concurrent.C
@@ -0,0 +1,60 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <flat_set>
+#include <algorithm>
+
+// MAX should be less than N to ensure that some duplicates occur.
+#define N 4000
+#define MAX 1000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand () % MAX;
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::flat_multiset<int> set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+  #pragma omp target data map (to: data[:N]) map (alloc: set)
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&set) std::flat_multiset<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  sum += i * set.count (i);
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	set.~flat_multiset ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C
new file mode 100644
index 0000000..b255cd5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__flat_set-concurrent.C
@@ -0,0 +1,67 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++23" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <flat_set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+  std::flat_set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array.
+      do
+	data[i] = rand () % MAX;
+      while (_set.count (data[i]) != 0);
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::flat_set<int> _set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+  #pragma omp target data map (to: data[:N]) map (alloc: _set)
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&_set) std::flat_set<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    _set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  if (_set.count (i) > 0)
+	    sum += i;
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	_set.~flat_set ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C
new file mode 100644
index 0000000..60d5cee
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__forward_list-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C
new file mode 100644
index 0000000..6b0ee65
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__forward_list-concurrent.C
@@ -0,0 +1,83 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <omp.h>
+#include <forward_list>
+#include <algorithm>
+
+#define N 3000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::forward_list<int> &list, int data[])
+{
+  int i = 0;
+  for (auto &v : list)
+    {
+      if (v != data[i] * data[i])
+	return false;
+      ++i;
+    }
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+
+  srand (time (NULL));
+  init (data);
+
+#ifdef MEM_SHARED
+  std::forward_list<int> list (std::begin (data), std::end (data));
+#else
+  std::forward_list<int> list;
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[:N]) map (alloc: list)
+#endif
+    {
+#ifndef MEM_SHARED
+      #pragma omp target
+	new (&list) std::forward_list<int> (std::begin (data), std::end (data));
+#endif
+
+      #pragma omp target teams
+	do
+	  {
+	    int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0);
+	    int start = len * omp_get_team_num ();
+	    if (start >= N)
+	      break;
+	    if (start + len >= N)
+	      len = N - start;
+	    auto it = list.begin ();
+	    std::advance (it, start);
+	    for (int i = 0; i < len; ++i)
+	      {
+		*it *= *it;
+		++it;
+	      }
+	  } while (false);
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (list, data);
+#ifndef MEM_SHARED
+	  list.~forward_list ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C
new file mode 100644
index 0000000..5057bf9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__list-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C
new file mode 100644
index 0000000..1f44a17
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__list-concurrent.C
@@ -0,0 +1,83 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <omp.h>
+#include <list>
+#include <algorithm>
+
+#define N 3000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::list<int> &_list, int data[])
+{
+  int i = 0;
+  for (auto &v : _list)
+    {
+      if (v != data[i] * data[i])
+	return false;
+      ++i;
+    }
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+
+  srand (time (NULL));
+  init (data);
+
+#ifdef MEM_SHARED
+  std::list<int> _list (std::begin (data), std::end (data));
+#else
+  std::list<int> _list;
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[:N]) map (alloc: _list)
+#endif
+    {
+#ifndef MEM_SHARED
+      #pragma omp target
+	new (&_list) std::list<int> (std::begin (data), std::end (data));
+#endif
+
+      #pragma omp target teams
+	do
+	  {
+	    int len = N / omp_get_num_teams () + (N % omp_get_num_teams () > 0);
+	    int start = len * omp_get_team_num ();
+	    if (start >= N)
+	      break;
+	    if (start + len >= N)
+	      len = N - start;
+	    auto it = _list.begin ();
+	    std::advance (it, start);
+	    for (int i = 0; i < len; ++i)
+	      {
+		*it *= *it;
+		++it;
+	      }
+	  } while (false);
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (_list, data);
+#ifndef MEM_SHARED
+	  _list.~list ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C
new file mode 100644
index 0000000..fe37426
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__map-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C
new file mode 100644
index 0000000..36556ef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__map-concurrent.C
@@ -0,0 +1,70 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <map>
+
+#define N 3000
+
+void init (int data[], bool unique)
+{
+  std::set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array if unique is true.
+      do
+	data[i] = rand ();
+      while (unique && _set.find (data[i]) != _set.end ());
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::map<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, true);
+  init (data, false);
+
+#ifndef MEM_SHARED
+  #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+#endif
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::map<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map[keys[i]] = data[i];
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < N; ++i)
+      sum += (long long) keys[i] * _map[keys[i]];
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~map ();
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target exit data map (release: _map)
+#endif
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C
new file mode 100644
index 0000000..79f9245
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__multimap-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C
new file mode 100644
index 0000000..6a4a4e8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multimap-concurrent.C
@@ -0,0 +1,68 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <map>
+
+// Make sure that KEY_MAX is less than N to ensure some duplicate keys.
+#define N 3000
+#define KEY_MAX 1000
+
+void init (int data[], int max)
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand () % max;
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::multimap<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, KEY_MAX);
+  init (data, RAND_MAX);
+
+#ifndef MEM_SHARED
+  #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+#endif
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::multimap<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map.insert({keys[i], data[i]});
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < KEY_MAX; ++i)
+      {
+	auto range = _map.equal_range (i);
+	for (auto it = range.first; it != range.second; ++it)
+	  sum += (long long) it->first * it->second;
+      }
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~multimap ();
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target exit data map (release: _map)
+#endif
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C
new file mode 100644
index 0000000..2d80756
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__multiset-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C
new file mode 100644
index 0000000..b12402e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__multiset-concurrent.C
@@ -0,0 +1,62 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <set>
+#include <algorithm>
+
+// MAX should be less than N to ensure that some duplicates occur.
+#define N 4000
+#define MAX 1000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand () % MAX;
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::multiset<int> set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[:N]) map (alloc: set)
+#endif
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&set) std::multiset<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  sum += i * set.count (i);
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	set.~multiset ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__numbers.C b/libgomp/testsuite/libgomp.c++/target-std__numbers.C
new file mode 100644
index 0000000..a6b3665
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__numbers.C
@@ -0,0 +1,93 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+
+#include <cmath>
+#include <numbers>
+
+#define FP_EQUAL(x,y) (std::abs ((x) - (y)) < 1E-6)
+
+#pragma omp declare target
+template<typename T> bool test_pi ()
+{
+  if (!FP_EQUAL (std::sin (std::numbers::pi_v<T>), (T) 0.0))
+    return false;
+  if (!FP_EQUAL (std::cos (std::numbers::pi_v<T>), (T) -1.0))
+    return false;
+  if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_pi_v<T>, (T) 1.0))
+    return false;
+  if (!FP_EQUAL (std::numbers::pi_v<T> * std::numbers::inv_sqrtpi_v<T>
+		 * std::numbers::inv_sqrtpi_v<T>, (T) 1.0))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_sqrt ()
+{
+  if (!FP_EQUAL (std::numbers::sqrt2_v<T> * std::numbers::sqrt2_v<T>, (T) 2.0))
+    return false;
+  if (!FP_EQUAL (std::numbers::sqrt3_v<T> * std::numbers::sqrt3_v<T>, (T) 3.0))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_phi ()
+{
+  T myphi = ((T) 1.0 + std::sqrt ((T) 5.0)) / (T) 2.0;
+  if (!FP_EQUAL (myphi, std::numbers::phi_v<T>))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_log ()
+{
+  if (!FP_EQUAL (std::log ((T) 2.0), std::numbers::ln2_v<T>))
+    return false;
+  if (!FP_EQUAL (std::log ((T) 10.0), std::numbers::ln10_v<T>))
+    return false;
+  if (!FP_EQUAL (std::log2 ((T) std::numbers::e), std::numbers::log2e_v<T>))
+    return false;
+  if (!FP_EQUAL (std::log10 ((T) std::numbers::e), std::numbers::log10e_v<T>))
+    return false;
+  return true;
+}
+
+template<typename T> bool test_egamma ()
+{
+  T myegamma = 0.0;
+  #pragma omp parallel for reduction(+:myegamma)
+    for (int k = 2; k < 100000; ++k)
+      myegamma += (std::riemann_zeta (k) - 1) / k;
+  myegamma = (T) 1 - myegamma;
+  if (!FP_EQUAL (myegamma, std::numbers::egamma_v<T>))
+    return false;
+  return true;
+}
+#pragma omp end declare target
+
+#define RUN_TEST(func) \
+{ \
+  pass++; \
+  bool ok = test_##func<float> (); \
+  if (!ok) { result = pass; break; } \
+  pass++; \
+  ok = test_##func<double> (); \
+  if (!ok) { result = pass; break; } \
+}
+
+int main (void)
+{
+  int result = 0;
+
+  #pragma omp target map (tofrom: result)
+    do {
+      int pass = 0;
+
+      RUN_TEST (pi);
+      RUN_TEST (sqrt);
+      RUN_TEST (phi);
+      RUN_TEST (log);
+      RUN_TEST (egamma);
+    } while (false);
+
+  return result;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C
new file mode 100644
index 0000000..54f62e3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__set-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C
new file mode 100644
index 0000000..cd23128
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__set-concurrent.C
@@ -0,0 +1,68 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+  std::set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array.
+      do
+	data[i] = rand () % MAX;
+      while (_set.find (data[i]) != _set.end ());
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::set<int> _set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[:N]) map (alloc: _set)
+#endif
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&_set) std::set<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    _set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  if (_set.find (i) != _set.end ())
+	    sum += i;
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	_set.~set ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C
new file mode 100644
index 0000000..7ef16bf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent-usm.C
@@ -0,0 +1,7 @@
+// { dg-additional-options "-std=c++20" }
+
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__span-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C
new file mode 100644
index 0000000..046b3c1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__span-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options "-std=c++20" }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <span>
+
+#define N 64
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::span<int, N> &span, int data[])
+{
+  for (int i = 0; i < N; ++i)
+    if (span[i] != data[i] * data[i])
+      return false;
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+  int elements[N];
+  std::span<int, N> span(elements);
+
+  srand (time (NULL));
+  init (data);
+
+#ifndef MEM_SHARED
+  #pragma omp target enter data map (to: data[:N]) map (alloc: elements, span)
+#endif
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&span) std::span<int, N> (elements);
+#endif
+      std::copy (data, data + N, span.begin ());
+    }
+
+  #pragma omp target teams distribute parallel for
+    for (int i = 0; i < N; ++i)
+      span[i] *= span[i];
+
+  #pragma omp target map (from: ok)
+    {
+      ok = validate (span, data);
+#ifndef MEM_SHARED
+      span.~span ();
+#endif
+    }
+
+#ifndef MEM_SHARED
+  #pragma omp target exit data map (release: elements, span)
+#endif
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C
new file mode 100644
index 0000000..00d7943
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_map-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <set>
+#include <unordered_map>
+
+#define N 3000
+
+void init (int data[], bool unique)
+{
+  std::set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array if unique is true.
+      do
+	data[i] = rand ();
+      while (unique && _set.count (data[i]) > 0);
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::unordered_map<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, true);
+  init (data, false);
+
+  #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::unordered_map<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map[keys[i]] = data[i];
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < N; ++i)
+      sum += (long long) keys[i] * _map[keys[i]];
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~unordered_map ();
+#endif
+
+  #pragma omp target exit data map (release: _map)
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C
new file mode 100644
index 0000000..2567634
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multimap-concurrent.C
@@ -0,0 +1,65 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <unordered_map>
+
+// Make sure that KEY_MAX is less than N to ensure some duplicate keys.
+#define N 3000
+#define KEY_MAX 1000
+
+void init (int data[], int max)
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = i % max;
+}
+
+bool validate (long long sum, int keys[], int data[])
+{
+  long long total = 0;
+  for (int i = 0; i < N; ++i)
+    total += (long long) keys[i] * data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int keys[N], data[N];
+  std::unordered_multimap<int,int> _map;
+
+  srand (time (NULL));
+  init (keys, KEY_MAX);
+  init (data, RAND_MAX);
+
+  #pragma omp target enter data map (to: keys[:N], data[:N]) map (alloc: _map)
+
+  #pragma omp target
+    {
+#ifndef MEM_SHARED
+      new (&_map) std::unordered_multimap<int,int> ();
+#endif
+      for (int i = 0; i < N; ++i)
+	_map.insert({keys[i], data[i]});
+    }
+
+  long long sum = 0;
+  #pragma omp target teams distribute parallel for reduction (+:sum)
+    for (int i = 0; i < KEY_MAX; ++i)
+      {
+	auto range = _map.equal_range (i);
+	for (auto it = range.first; it != range.second; ++it) {
+	  sum += (long long) it->first * it->second;
+	}
+      }
+
+#ifndef MEM_SHARED
+  #pragma omp target
+    _map.~unordered_multimap ();
+#endif
+
+  #pragma omp target exit data map (release: _map)
+
+  bool ok = validate (sum, keys, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C
new file mode 100644
index 0000000..da6c875
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_multiset-concurrent.C
@@ -0,0 +1,59 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <unordered_set>
+#include <algorithm>
+
+// MAX should be less than N to ensure that some duplicates occur.
+#define N 4000
+#define MAX 1000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand () % MAX;
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::unordered_multiset<int> set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+  #pragma omp target data map (to: data[:N]) map (alloc: set)
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&set) std::unordered_multiset<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  sum += i * set.count (i);
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	set.~unordered_multiset ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C
new file mode 100644
index 0000000..b7bd935
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__unordered_set-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <unordered_set>
+#include <algorithm>
+
+#define N 4000
+#define MAX 16384
+
+void init (int data[])
+{
+  std::unordered_set<int> _set;
+  for (int i = 0; i < N; ++i)
+    {
+      // Avoid duplicates in data array.
+      do
+	data[i] = rand () % MAX;
+      while (_set.count (data[i]) != 0);
+      _set.insert (data[i]);
+    }
+}
+
+bool validate (int sum, int data[])
+{
+  int total = 0;
+  for (int i = 0; i < N; ++i)
+    total += data[i];
+  return sum == total;
+}
+
+int main (void)
+{
+  int data[N];
+  std::unordered_set<int> _set;
+  int sum = 0;
+
+  srand (time (NULL));
+  init (data);
+
+  #pragma omp target data map (to: data[:N]) map (alloc: _set)
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&_set) std::unordered_set<int> ();
+#endif
+	  for (int i = 0; i < N; ++i)
+	    _set.insert (data[i]);
+	}
+
+      #pragma omp target teams distribute parallel for reduction (+:sum)
+	for (int i = 0; i < MAX; ++i)
+	  if (_set.count (i) > 0)
+	    sum += i;
+
+#ifndef MEM_SHARED
+      #pragma omp target
+	_set.~unordered_set ();
+#endif
+    }
+
+  bool ok = validate (sum, data);
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C
new file mode 100644
index 0000000..865cde2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.C
@@ -0,0 +1,179 @@
+// { dg-additional-options -std=c++20 }
+// { dg-output-file target-std__valarray-1.output }
+
+#include <valarray>
+#include <ostream>
+#include <sstream>
+
+
+/*TODO Work around PR118484 "ICE during IPA pass: cp, segfault in determine_versionability ipa-cp.cc:467".
+
+We can't:
+
+    #pragma omp declare target(std::basic_streambuf<char, std::char_traits<char>>::basic_streambuf)
+
+... because:
+
+    error: overloaded function name ‘std::basic_streambuf<char>::__ct ’ in clause ‘enter’
+
+Therefore, use dummy classes in '#pragma omp declare target':
+*/
+
+#pragma omp declare target
+
+// For 'std::basic_streambuf<char, std::char_traits<char> >::basic_streambuf':
+
+class dummy_basic_streambuf__char
+  : public std::basic_streambuf<char>
+{
+public:
+  dummy_basic_streambuf__char() {}
+};
+
+// For 'std::basic_ios<char, std::char_traits<char> >::basic_ios()':
+
+class dummy_basic_ios__char
+  : public std::basic_ios<char>
+{
+public:
+  dummy_basic_ios__char() {}
+};
+
+#pragma omp end declare target
+
+
+int main()
+{
+  // Due to PR120021 "Offloading vs. C++ 'std::initializer_list'", we can't construct these on the device.
+  std::initializer_list<int> v1_i = {10, 20, 30, 40, 50};
+  const int *v1_i_data = std::data(v1_i);
+  size_t v1_i_size = v1_i.size();
+  std::initializer_list<int> v2_i = {5, 4, 3, 2, 1};
+  const int *v2_i_data = std::data(v2_i);
+  size_t v2_i_size = v2_i.size();
+  std::initializer_list<int> shiftData_i = {1, 2, 3, 4, 5};
+  const int *shiftData_i_data = std::data(shiftData_i);
+  size_t shiftData_i_size = shiftData_i.size();
+#pragma omp target \
+  defaultmap(none) \
+  map(to: v1_i_data[:v1_i_size], v1_i_size, \
+          v2_i_data[:v2_i_size], v2_i_size, \
+          shiftData_i_data[:shiftData_i_size], shiftData_i_size)
+  {
+    /* Manually set up a buffer we can stream into, similar to 'cout << [...]', and print it at the end of region.  */
+    std::stringbuf out_b;
+    std::ostream out(&out_b);
+
+    std::valarray<int> v1(v1_i_data, v1_i_size);
+    out << "\nv1:";
+    for (auto val : v1)
+      out << " " << val;
+
+    std::valarray<int> v2(v2_i_data, v2_i_size);
+    out << "\nv2:";
+    for (auto val : v2)
+      out << " " << val;
+
+    std::valarray<int> sum = v1 + v2;
+    out << "\nv1 + v2:";
+    for (auto val : sum)
+      out << " " << val;
+
+    std::valarray<int> diff = v1 - v2;
+    out << "\nv1 - v2:";
+    for (auto val : diff)
+      out << " " << val;
+
+    std::valarray<int> product = v1 * v2;
+    out << "\nv1 * v2:";
+    for (auto val : product)
+      out << " " << val;
+
+    std::valarray<int> quotient = v1 / v2;
+    out << "\nv1 / v2:";
+    for (auto val : quotient)
+      out << " " << val;
+
+    std::valarray<int> squares = pow(v1, 2);
+    out << "\npow(v1, 2):";
+    for (auto val : squares)
+      out << " " << val;
+
+    std::valarray<int> sinhs = sinh(v2);
+    out << "\nsinh(v2):";
+    for (auto val : sinhs)
+      out << " " << val;
+
+    std::valarray<int> logs = log(v1 * v2);
+    out << "\nlog(v1 * v2):";
+    for (auto val : logs)
+      out << " " << val;
+
+    std::valarray<int> data(12);
+    for (size_t i = 0; i < data.size(); ++i)
+      data[i] = i;
+    out << "\nOriginal array:";
+    for (auto val : data)
+      out << " " << val;
+
+    std::slice slice1(2, 5, 1);
+    std::valarray<int> sliced1 = data[slice1];
+    out << "\nSlice(2, 5, 1):";
+    for (auto val : sliced1)
+      out << " " << val;
+
+    std::slice slice2(1, 4, 3);
+    std::valarray<int> sliced2 = data[slice2];
+    out << "\nSlice(1, 4, 3):";
+    for (auto val : sliced2)
+      out << " " << val;
+
+    data[slice1] = 99;
+    out << "\nArray after slice modification:";
+    for (auto val : data)
+      out << " " << val;
+
+    std::valarray<bool> mask = (v1 > 20);
+    out << "\nElements of v1 > 20:";
+    for (size_t i = 0; i < v1.size(); ++i)
+      {
+	if (mask[i])
+	  out << " " << v1[i];
+      }
+
+    std::valarray<int> masked = v1[mask];
+    out << "\nMasked array:";
+    for (auto val : masked)
+      out << " " << val;
+
+    std::valarray<int> shiftData(shiftData_i_data, shiftData_i_size);
+    out << "\nOriginal shiftData:";
+    for (auto val : shiftData)
+      out << " " << val;
+
+    std::valarray<int> shifted = shiftData.shift(2);
+    out << "\nshift(2):";
+    for (auto val : shifted)
+      out << " " << val;
+
+    std::valarray<int> cshifted = shiftData.cshift(-1);
+    out << "\ncshift(-1):";
+    for (auto val : cshifted)
+      out << " " << val;
+
+    out << "\nSum(v1): " << v1.sum();
+    out << "\nMin(v1): " << v1.min();
+    out << "\nMax(v1): " << v1.max();
+
+    out << "\n";
+
+    /* Terminate with a NUL.  Otherwise, we'd have to use:
+           __builtin_printf("%.*s", (int) out_b_sv.size(), out_b_sv.data());
+       ... which nvptx 'printf', as implemented via PTX 'vprintf', doesn't support (TODO).  */
+    out << '\0';
+    std::string_view out_b_sv = out_b.view();
+    __builtin_printf("%s", out_b_sv.data());
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output
new file mode 100644
index 0000000..c441e06
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-1.output
@@ -0,0 +1,22 @@
+
+v1: 10 20 30 40 50
+v2: 5 4 3 2 1
+v1 + v2: 15 24 33 42 51
+v1 - v2: 5 16 27 38 49
+v1 * v2: 50 80 90 80 50
+v1 / v2: 2 5 10 20 50
+pow(v1, 2): 100 400 900 1600 2500
+sinh(v2): 74 27 10 3 1
+log(v1 * v2): 3 4 4 4 3
+Original array: 0 1 2 3 4 5 6 7 8 9 10 11
+Slice(2, 5, 1): 2 3 4 5 6
+Slice(1, 4, 3): 1 4 7 10
+Array after slice modification: 0 1 99 99 99 99 99 7 8 9 10 11
+Elements of v1 > 20: 30 40 50
+Masked array: 30 40 50
+Original shiftData: 1 2 3 4 5
+shift(2): 3 4 5 0 0
+cshift(-1): 5 1 2 3 4
+Sum(v1): 150
+Min(v1): 10
+Max(v1): 50
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C
new file mode 100644
index 0000000..41ec80e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__valarray-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C
new file mode 100644
index 0000000..8933072b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__valarray-concurrent.C
@@ -0,0 +1,66 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <valarray>
+
+#define N 50000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::valarray<int> &arr, int data[])
+{
+  for (int i = 0; i < N; ++i)
+    if (arr[i] != data[i] * data[i] + i)
+      return false;
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+
+  srand (time (NULL));
+  init (data);
+
+#ifdef MEM_SHARED
+  std::valarray<int> arr (data, N);
+#else
+  std::valarray<int> arr;
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[:N]) map (alloc: arr)
+#endif
+    {
+      #pragma omp target
+	{
+#ifndef MEM_SHARED
+	  new (&arr) std::valarray<int> (data, N);
+#endif
+	  arr *= arr;
+	}
+
+      #pragma omp target teams distribute parallel for
+	for (int i = 0; i < N; ++i)
+	  arr[i] += i;
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (arr, data);
+#ifndef MEM_SHARED
+	  arr.~valarray ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C
new file mode 100644
index 0000000..967bff3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent-usm.C
@@ -0,0 +1,5 @@
+#pragma omp requires unified_shared_memory self_maps
+
+#define MEM_SHARED
+
+#include "target-std__vector-concurrent.C"
diff --git a/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C
new file mode 100644
index 0000000..a94b4cf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c++/target-std__vector-concurrent.C
@@ -0,0 +1,63 @@
+// { dg-do run }
+// { dg-additional-options -DMEM_SHARED { target offload_device_shared_as } }
+
+#include <stdlib.h>
+#include <time.h>
+#include <vector>
+
+#define N 50000
+
+void init (int data[])
+{
+  for (int i = 0; i < N; ++i)
+    data[i] = rand ();
+}
+
+#pragma omp declare target
+bool validate (const std::vector<int> &vec, int data[])
+{
+  for (int i = 0; i < N; ++i)
+    if (vec[i] != data[i] * data[i])
+      return false;
+  return true;
+}
+#pragma omp end declare target
+
+int main (void)
+{
+  int data[N];
+  bool ok;
+
+  srand (time (NULL));
+  init (data);
+
+#ifdef MEM_SHARED
+  std::vector<int> vec (data, data + N);
+#else
+  std::vector<int> vec;
+#endif
+
+#ifndef MEM_SHARED
+  #pragma omp target data map (to: data[:N]) map (alloc: vec)
+#endif
+    {
+#ifndef MEM_SHARED
+      #pragma omp target
+	new (&vec) std::vector<int> (data, data + N);
+#endif
+
+      #pragma omp target teams distribute parallel for
+	for (int i = 0; i < N; ++i)
+	  vec[i] *= vec[i];
+
+      #pragma omp target map (from: ok)
+	{
+	  ok = validate (vec, data);
+#ifndef MEM_SHARED
+	  vec.~vector ();
+#endif
+	}
+    }
+
+  return ok ? 0 : 1;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c
new file mode 100644
index 0000000..00eb48b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-10.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+typedef struct {
+  int *arr;
+  int size;
+} B;
+
+#pragma omp declare mapper (mapB : B myb) map(to: myb.size, myb.arr) \
+					  map(tofrom: myb.arr[0:myb.size])
+// While GCC handles more, only default is ...
+#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \
+					  map(tofrom: myb.arr[0:myb.size])
+
+struct A {
+  int *arr1;
+  B *arr2;
+  int arr3[N];
+};
+
+int
+main (int argc, char *argv[])
+{
+  struct A var;
+
+  memset (&var, 0, sizeof var);
+  var.arr1 = (int *) calloc (N, sizeof (int));
+  var.arr2 = (B *) malloc (sizeof (B));
+  var.arr2->arr = (int *) calloc (N, sizeof (float));
+  var.arr2->size = N;
+
+  {
+    // ... permitted here:
+    #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \
+			  map(tofrom: x.arr1[0:N]) \
+			  map(mapper(default), tofrom: x.arr2[0:1])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	{
+	  var.arr1[i]++;
+	  var.arr2->arr[i]++;
+	}
+    }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (var.arr1[i] == 1);
+      assert (var.arr2->arr[i] == 1);
+      assert (var.arr3[i] == 0);
+    }
+
+  free (var.arr1);
+  free (var.arr2->arr);
+  free (var.arr2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c
new file mode 100644
index 0000000..942d6a5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-11.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+typedef struct B_tag {
+  int *arr;
+  int size;
+} B;
+
+#pragma omp declare mapper (B myb) map(to: myb.size, myb.arr) \
+				   map(tofrom: myb.arr[0:myb.size])
+
+struct A {
+  int *arr1;
+  B *arr2;
+  int arr3[N];
+};
+
+int
+main (int argc, char *argv[])
+{
+  struct A var;
+
+  memset (&var, 0, sizeof var);
+  var.arr1 = (int *) calloc (N, sizeof (int));
+  var.arr2 = (B *) malloc (sizeof (B));
+  var.arr2->arr = (int *) calloc (N, sizeof (int));
+  var.arr2->size = N;
+
+  {
+    #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \
+			map(tofrom: x.arr1[0:N]) map(tofrom: x.arr2[0:1])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	{
+	  var.arr1[i]++;
+	  var.arr2->arr[i]++;
+	}
+    }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (var.arr1[i] == 1);
+      assert (var.arr2->arr[i] == 1);
+      assert (var.arr3[i] == 0);
+    }
+
+  free (var.arr1);
+  free (var.arr2->arr);
+  free (var.arr2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c
new file mode 100644
index 0000000..cfc6a91
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-12.c
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+typedef struct {
+  int *arr;
+  int size;
+} B;
+
+#pragma omp declare mapper (samename : B myb) map(to: myb.size, myb.arr) \
+					      map(tofrom: myb.arr[0:myb.size])
+// While GCC handles more, only default is ...
+#pragma omp declare mapper (default : B myb) map(to: myb.size, myb.arr) \
+					      map(tofrom: myb.arr[0:myb.size])
+typedef struct {
+  int *arr;
+  int size;
+} C;
+
+
+struct A {
+  int *arr1;
+  B *arr2;
+  C *arr3;
+};
+
+int
+main (int argc, char *argv[])
+{
+  struct A var;
+
+  memset (&var, 0, sizeof var);
+  var.arr1 = (int *) calloc (N, sizeof (int));
+  var.arr2 = (B *) malloc (sizeof (B));
+  var.arr2->arr = (int *) calloc (N, sizeof (int));
+  var.arr2->size = N;
+  var.arr3 = (C *) malloc (sizeof (C));
+  var.arr3->arr = (int *) calloc (N, sizeof (int));
+  var.arr3->size = N;
+
+  {
+    // ... permitted here.
+    #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr2) \
+			map(tofrom: x.arr1[0:N]) \
+			map(mapper(default), tofrom: x.arr2[0:1])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	{
+	  var.arr1[i]++;
+	  var.arr2->arr[i]++;
+	}
+    }
+  }
+
+  {
+    #pragma omp declare mapper (samename : C myc) map(to: myc.size, myc.arr) \
+			map(tofrom: myc.arr[0:myc.size])
+    // While GCC handles more, only default is ...
+    #pragma omp declare mapper (default : C myc) map(to: myc.size, myc.arr) \
+			map(tofrom: myc.arr[0:myc.size])
+    // ... permitted here.
+    #pragma omp declare mapper (struct A x) map(to: x.arr1, x.arr3) \
+			map(tofrom: x.arr1[0:N]) \
+			map(mapper( default ) , tofrom: *x.arr3)
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	{
+	  var.arr1[i]++;
+	  var.arr3->arr[i]++;
+	}
+    }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (var.arr1[i] == 2);
+      assert (var.arr2->arr[i] == 1);
+      assert (var.arr3->arr[i] == 1);
+    }
+
+  free (var.arr1);
+  free (var.arr2->arr);
+  free (var.arr2);
+  free (var.arr3->arr);
+  free (var.arr3);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c
new file mode 100644
index 0000000..c4784eb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-13.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+
+#include <assert.h>
+
+struct T {
+  int a;
+  int b;
+  int c;
+};
+
+void foo (void)
+{
+  struct T x;
+  x.a = x.b = x.c = 0;
+
+#pragma omp target
+  {
+    x.a++;
+    x.c++;
+  }
+
+  assert (x.a == 1);
+  assert (x.b == 0);
+  assert (x.c == 1);
+}
+
+// An identity mapper.  This should do the same thing as the default!
+#pragma omp declare mapper (struct T v) map(v)
+
+void bar (void)
+{
+  struct T x;
+  x.a = x.b = x.c = 0;
+
+#pragma omp target
+  {
+    x.b++;
+  }
+
+#pragma omp target map(x)
+  {
+    x.a++;
+  }
+
+  assert (x.a == 1);
+  assert (x.b == 1);
+  assert (x.c == 0);
+}
+
+int main (int argc, char *argv[])
+{
+  foo ();
+  bar ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c
new file mode 100644
index 0000000..3e6027e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-14.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct Z {
+  int *arr;
+};
+
+void baz (struct Z *zarr, int len)
+{
+#pragma omp declare mapper (struct Z myvar) map(to: myvar.arr) \
+					    map(tofrom: myvar.arr[0:len])
+  zarr[0].arr = (int *) calloc (len, sizeof (int));
+  zarr[5].arr = (int *) calloc (len, sizeof (int));
+
+#pragma omp target map(zarr, *zarr)
+  {
+    for (int i = 0; i < len; i++)
+      zarr[0].arr[i]++;
+  }
+
+#pragma omp target map(zarr, zarr[5])
+  {
+    for (int i = 0; i < len; i++)
+      zarr[5].arr[i]++;
+  }
+
+#pragma omp target map(zarr[5])
+  {
+    for (int i = 0; i < len; i++)
+      zarr[5].arr[i]++;
+  }
+
+#pragma omp target map(zarr, zarr[5:1])
+  {
+    for (int i = 0; i < len; i++)
+      zarr[5].arr[i]++;
+  }
+
+  for (int i = 0; i < len; i++)
+    assert (zarr[0].arr[i] == 1);
+
+  for (int i = 0; i < len; i++)
+    assert (zarr[5].arr[i] == 3);
+
+  free (zarr[5].arr);
+  free (zarr[0].arr);
+}
+
+int
+main (int argc, char *argv[])
+{
+  struct Z myzarr[10];
+  baz (myzarr, 256);
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c
new file mode 100644
index 0000000..324d535
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/declare-mapper-9.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#define N 64
+
+struct A {
+  int *arr1;
+  float *arr2;
+  int arr3[N];
+};
+
+int
+main (int argc, char *argv[])
+{
+  struct A var;
+
+  memset (&var, 0, sizeof var);
+  var.arr1 = (int *) calloc (N, sizeof (int));
+  var.arr2 = (float *) calloc (N, sizeof (float));
+
+  {
+    #pragma omp declare mapper (struct A x) map(to: x.arr1) \
+					    map(tofrom: x.arr1[0:N])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	var.arr1[i]++;
+    }
+  }
+
+  {
+    #pragma omp declare mapper (struct A x) map(to: x.arr2) \
+					    map(tofrom: x.arr2[0:N])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	var.arr2[i]++;
+    }
+  }
+
+  {
+    #pragma omp declare mapper (struct A x) map(tofrom: x.arr3[0:N])
+    #pragma omp target
+    {
+      for (int i = 0; i < N; i++)
+	var.arr3[i]++;
+    }
+  }
+
+  for (int i = 0; i < N; i++)
+    {
+      assert (var.arr1[i] == 1);
+      assert (var.arr2[i] == 1);
+      assert (var.arr3[i] == 1);
+    }
+
+  free (var.arr1);
+  free (var.arr2);
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/interop-2.c b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c
new file mode 100644
index 0000000..a7526dc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/interop-2.c
@@ -0,0 +1,129 @@
+/* { dg-do run } */
+/* { dg-additional-options "-lm" } */
+
+/* Note: At the time this program was written, Nvptx was not asynchronous
+   enough to trigger the issue (with a 'nowait' added); however, one
+   AMD GPUs, it triggered.  */
+
+/* Test whether nowait / dependency is handled correctly.
+   Motivated by OpenMP_VV's 5.1/interop/test_interop_target.c
+
+   The code actually only creates a streaming object without actually using it,
+   except for dependency tracking.
+
+   Note that there is a difference between having a steaming (targetsync) object
+   and not (= omp_interop_none); at least if one assumes that omp_interop_none
+   does not include 'targetsync' as (effective) interop type - in that case,
+   'nowait' has no effect and the 'depend' is active as included task, otherwise
+   the code continues with the depend being active only for the about to be
+   destroyed or used thread.
+
+   The OpenMP spec states (here 6.0):
+     "If the interop-type set includes 'targetsync', an empty mergeable task is
+      generated.  If the 'nowait' clause is not present on the construct then
+      the task is also an included task. If the interop-type set does not
+      include 'targetsync', the 'nowait' clause has no effect.  Any depend
+      clauses that are present on the construct apply to the generated task.  */
+
+#include <omp.h>
+
+void
+test_async (const int dev)
+{
+  constexpr int N = 2048;
+  constexpr int ulp = 4;
+  constexpr double M_PI = 2.0 * __builtin_acos (0.0);
+  omp_interop_t obj1, obj2;
+  double A[N] = { };
+  int B[N] = { };
+
+  /* Create interop object.  */
+  #pragma omp interop device(dev) init(targetsync : obj1, obj2)
+
+  if (dev == omp_initial_device || dev == omp_get_num_devices ())
+    {
+      if (obj1 != omp_interop_none || obj2 != omp_interop_none)
+	__builtin_abort ();
+    }
+  else
+    {
+      if (obj1 == omp_interop_none || obj2 == omp_interop_none)
+	__builtin_abort ();
+    }
+
+  /* DOUBLE */
+
+  /* Now in the background update it, slowly enough that the
+     code afterwards is reached while still running asynchronously.
+     As OpenMP_VV's Issue #863 shows, the overhead is high enough to
+     fail even when only doing an atomic integer increment.  */
+
+  #pragma omp target device(dev) map(A) depend(out: A[:N]) nowait
+  for (int i = 0; i < N; i++)
+    #pragma omp atomic update
+    A[i] += __builtin_sin (2*i*M_PI/N);
+
+  /* DESTROY take care of the dependeny such that ... */
+
+  if (obj1 == omp_interop_none)
+    {
+      // Same as below as 'nowait' is ignored.
+      #pragma omp interop destroy(obj1) depend(in: A[:N]) nowait
+    }
+  else
+    {
+      #pragma omp interop destroy(obj1) depend(in: A[:N])
+    }
+
+  /* ... this code is only executed once the dependency as been fulfilled.  */
+
+  /* Check the value - part I: quick, avoid A[0] == sin(0) = 0.  */
+  for (int i = 1; i < N; i++)
+    if (A[i] == 0.0)
+      __builtin_abort ();
+
+  /* Check the value - part II: throughly */
+  for (int i = 0; i < N; i++)
+    {
+      double x = A[i];
+      double y = __builtin_sin (2*i*M_PI/N);
+      if (__builtin_fabs (x - y) > ulp * __builtin_fabs (x+y) * __DBL_EPSILON__)
+	__builtin_abort ();
+    }
+
+  /* Integer */
+
+  #pragma omp target device(dev) map(B) depend(out: B[:N]) nowait
+  for (int i = 0; i < N; i++)
+    #pragma omp atomic update
+    B[i] += 42;
+
+  /* Same - but using USE.  */
+  if (obj2 == omp_interop_none)
+    {
+      // Same as below as 'nowait' is ignored.
+      #pragma omp interop use(obj2) depend(in: B[:N]) nowait
+    }
+  else
+    {
+      #pragma omp interop use(obj2) depend(in: B[:N])
+    }
+
+  for (int i = 0; i < N; i++)
+    if (B[i] != 42)
+      __builtin_abort ();
+
+  #pragma omp interop destroy(obj2)
+}
+
+int
+main ()
+{
+  int ndev = omp_get_num_devices ();
+
+  for (int dev = 0; dev <= ndev; dev++)
+    test_async (dev);
+  test_async (omp_initial_device);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c b/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c
index a57d6fd..fbe4ac3 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/metadirective-1.c
@@ -1,4 +1,5 @@
-/* { dg-do run } */
+/* { dg-do run { target { ! offload_target_nvptx } } } */
+/* { dg-do compile { target offload_target_nvptx } } */
 
 #define N 100
 
@@ -7,12 +8,17 @@ f (int x[], int y[], int z[])
 {
   int i;
 
+  // The following fails as on the host the target side cannot be
+  // resolved - and the 'teams' or not status affects how 'target'
+  // is called.
+  // Note also the dg-do compile above for offload_target_nvptx
   #pragma omp target map(to: x[0:N], y[0:N]) map(from: z[0:N])
     #pragma omp metadirective \
 	when (device={arch("nvptx")}: teams loop) \
 	default (parallel loop)
       for (i = 0; i < N; i++)
 	z[i] = x[i] * y[i];
+  /* { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-6 }  */
 }
 
 int
diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c
new file mode 100644
index 0000000..b36d2f5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-2.c
@@ -0,0 +1,62 @@
+// PR libgomp/120444
+// Async version
+
+#include <omp.h>
+
+int main()
+{
+  #pragma omp parallel for
+  for (int dev = omp_initial_device; dev <= omp_get_num_devices (); dev++)
+    {
+      char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev);
+
+      omp_depend_t dep;
+      #pragma omp depobj(dep) depend(inout: ptr)
+
+      /* Play also around with the alignment - as hsa_amd_memory_fill operates
+	 on multiples of 4 bytes (uint32_t).  */
+
+      for (int start = 0; start < 32; start++)
+	for (int tail = 0; tail < 32; tail++)
+	  {
+	    unsigned char val = '0' + start + tail;
+#if __cplusplus
+	    void *ptr2 = omp_target_memset_async (ptr + start, val,
+					    1024 - start - tail, dev, 0);
+#else
+	    void *ptr2 = omp_target_memset_async (ptr + start, val,
+					    1024 - start - tail, dev, 0, nullptr);
+#endif
+	    if (ptr + start != ptr2)
+	      __builtin_abort ();
+
+	    #pragma omp taskwait
+
+	    #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait
+	      for (int i = start; i < 1024 - start - tail; i++)
+		{
+		  if (ptr[i] != val)
+		    __builtin_abort ();
+		  ptr[i] += 2;
+		}
+
+	    omp_target_memset_async (ptr + start, val + 3,
+				     1024 - start - tail, dev, 1, &dep);
+
+	    #pragma omp target device(dev) is_device_ptr(ptr) depend(depobj: dep) nowait
+	      for (int i = start; i < 1024 - start - tail; i++)
+		{
+		  if (ptr[i] != val + 3)
+		    __builtin_abort ();
+		  ptr[i] += 1;
+		}
+
+	    omp_target_memset_async (ptr + start, val - 3,
+				     1024 - start - tail, dev, 1, &dep);
+
+	    #pragma omp taskwait depend (depobj: dep)
+	  }
+      #pragma omp depobj(dep) destroy
+      omp_target_free (ptr, dev);
+    }
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c
new file mode 100644
index 0000000..c0e4fa9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset-3.c
@@ -0,0 +1,80 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <omp.h>
+
+#define MIN(x,y) ((x) < (y) ? x : y)
+
+enum { N = 524288 + 8 };
+
+static void
+init_val (int8_t *ptr, int val, size_t count)
+{
+  #pragma omp target is_device_ptr(ptr) firstprivate(val, count)
+  __builtin_memset (ptr, val, count);
+}
+
+static void
+check_val (int8_t *ptr, int val, size_t count)
+{
+  if (count == 0)
+    return;
+  #pragma omp target is_device_ptr(ptr) firstprivate(val, count)
+  for (size_t i = 0; i < count; i++)
+    if (ptr[i] != val) __builtin_abort ();
+}
+
+static void
+test_it (int8_t *ptr, int lshift, size_t count)
+{
+  if (N < count + lshift) __builtin_abort ();
+  if (lshift >= 4) __builtin_abort ();
+  ptr += lshift;
+
+  init_val (ptr, 'z', MIN (count + 32, N - lshift));
+
+  omp_target_memset (ptr, '1', count, omp_get_default_device());
+
+  check_val (ptr, '1', count);
+  check_val (ptr + count, 'z', MIN (32, N - lshift - count));
+}
+
+
+int main()
+{
+  size_t size;
+  int8_t *ptr = (int8_t *) omp_target_alloc (N + 3, omp_get_default_device());
+  ptr += (4 - (uintptr_t) ptr % 4) % 4;
+  if ((uintptr_t) ptr % 4 != 0) __builtin_abort ();
+
+  test_it (ptr, 0, 1);
+  test_it (ptr, 3, 1);
+  test_it (ptr, 0, 4);
+  test_it (ptr, 3, 4);
+  test_it (ptr, 0, 5);
+  test_it (ptr, 3, 5);
+  test_it (ptr, 0, 6);
+  test_it (ptr, 3, 6);
+
+  for (int i = 1; i <= 9; i++)
+    {
+      switch (i)
+	{
+	case 1: size = 16; break; // = 2^4 bytes
+	case 2: size = 32; break; // = 2^5 bytes
+	case 3: size = 64; break; // = 2^7 bytes
+	case 4: size = 128; break; // = 2^7 bytes
+	case 5: size = 256; break; // = 2^8 bytes
+	case 6: size = 512; break; // = 2^9 bytes
+	case 7: size = 65536; break; // = 2^16 bytes
+	case 8: size = 262144; break; // = 2^18 bytes
+	case 9: size = 524288; break; // = 2^20 bytes
+	default: __builtin_abort ();
+	}
+      test_it (ptr, 0, size);
+      test_it (ptr, 3, size);
+      test_it (ptr, 0, size + 1);
+      test_it (ptr, 3, size + 1);
+      test_it (ptr, 3, size + 2);
+    }
+  omp_target_free (ptr, omp_get_default_device());
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c
new file mode 100644
index 0000000..01909f8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/omp_target_memset.c
@@ -0,0 +1,62 @@
+// PR libgomp/120444
+
+#include <omp.h>
+
+int main()
+{
+  for (int dev = omp_initial_device; dev < omp_get_num_devices (); dev++)
+    {
+      char *ptr = (char *) omp_target_alloc (sizeof(int) * 1024, dev);
+
+      /* Play also around with the alignment - as hsa_amd_memory_fill operates
+	 on multiples of 4 bytes (uint32_t).  */
+
+      for (int start = 0; start < 32; start++)
+	for (int tail = 0; tail < 32; tail++)
+	  {
+	    unsigned char val = '0' + start + tail;
+	    void *ptr2 = omp_target_memset (ptr + start, val,
+					    1024 - start - tail, dev);
+	    if (ptr + start != ptr2)
+	      __builtin_abort ();
+
+	    #pragma omp target device(dev) is_device_ptr(ptr)
+	      for (int i = start; i < 1024 - start - tail; i++)
+		if (ptr[i] != val)
+		  __builtin_abort ();
+
+	  }
+
+      /* Check 'small' values for correctness.  */
+
+      for (int start = 0; start < 32; start++)
+	for (int size = 0; size <= 64 + 32; size++)
+	  {
+	    omp_target_memset (ptr, 'a' - 2, 1024, dev);
+
+	    unsigned char val = '0' + start + size % 32;
+	    void *ptr2 = omp_target_memset (ptr + start, val, size, dev);
+
+	    if (ptr + start != ptr2)
+	      __builtin_abort ();
+
+	    if (size == 0)
+	      continue;
+
+	    #pragma omp target device(dev) is_device_ptr(ptr)
+	    {
+	      for (int i = 0; i < start; i++)
+		if (ptr[i] != 'a' - 2)
+		  __builtin_abort ();
+	      for (int i = start; i < start + size; i++)
+		if (ptr[i] != val)
+		  __builtin_abort ();
+	      for (int i = start + size + 1; i < 1024; i++)
+		if (ptr[i] != 'a' - 2)
+		  __builtin_abort ();
+	    }
+	  }
+
+      omp_target_free (ptr, dev);
+    }
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr96390.c b/libgomp/testsuite/libgomp.c-c++-common/pr96390.c
index b89f934..ca7865d 100644
--- a/libgomp/testsuite/libgomp.c-c++-common/pr96390.c
+++ b/libgomp/testsuite/libgomp.c-c++-common/pr96390.c
@@ -1,7 +1,7 @@
 /* { dg-additional-options "-O0 -fdump-tree-omplower" } */
 /* { dg-additional-options "-foffload=-Wa,--verify" { target offload_target_nvptx } } */
 /* { dg-require-alias "" } */
-/* { dg-xfail-if "PR 97102/PR 97106 - .alias not (yet) supported for nvptx" { offload_target_nvptx } } */
+/* { dg-xfail-if PR105018 { offload_target_nvptx } } */
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
new file mode 100644
index 0000000..9bf949a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1-O0.c
@@ -0,0 +1,3 @@
+/* { dg-additional-options -O0 } */
+
+#include "target-abi-struct-1.c"
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c
new file mode 100644
index 0000000..d9268af
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-abi-struct-1.c
@@ -0,0 +1 @@
+#include "../libgomp.oacc-c-c++-common/abi-struct-1.c"
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c
new file mode 100644
index 0000000..e6099cf
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-cdtor-1.c
@@ -0,0 +1,89 @@
+/* Offloaded 'constructor' and 'destructor' functions.  */
+
+#include <omp.h>
+
+#pragma omp declare target
+
+static void
+__attribute__((constructor))
+initHD1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((constructor))
+initHD2()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiHD1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiHD2()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+#pragma omp end declare target
+
+static void
+__attribute__((constructor))
+initH1()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor))
+finiH2()
+{
+  __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+int main()
+{
+  int c = 0;
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+#pragma omp target map(c)
+  {
+    __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+  }
+
+  __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+  return 0;
+}
+
+/* The order is undefined, in which same-priority 'constructor' functions, and 'destructor' functions are run.
+   { dg-output {init[^,]+, 1[\r\n]+} }
+   { dg-output {init[^,]+, 1[\r\n]+} }
+   { dg-output {init[^,]+, 1[\r\n]+} }
+   { dg-output {main:1, 1[\r\n]+} }
+   { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } }
+   { dg-output {initHD[^,]+, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+   { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:3, 1[\r\n]+} { target  { ! offload_device } } }
+   { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+   { dg-output {main:4, 1[\r\n]+} }
+   { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } }
+   { dg-output {finiHD[^,]+, 0[\r\n]+} { target offload_device } }
+   { dg-output {fini[^,]+, 1[\r\n]+} }
+   { dg-output {fini[^,]+, 1[\r\n]+} }
+   { dg-output {fini[^,]+, 1[\r\n]+} }
+*/
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp b/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp
new file mode 100644
index 0000000..02d5503
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/aarch64.exp
@@ -0,0 +1,57 @@
+# Copyright (C) 2006-2025 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Load support procs.
+load_lib libgomp-dg.exp
+load_gcc_lib gcc-dg.exp
+
+# Exit immediately if this isn't an AArch64 target.
+if {![istarget aarch64*-*-*] } then {
+  return
+}
+
+lappend ALWAYS_CFLAGS "compiler=$GCC_UNDER_TEST"
+
+if { [check_effective_target_aarch64_sve] } {
+    set sve_flags ""
+} else {
+    set sve_flags "-march=armv8.2-a+sve"
+}
+
+# Initialize `dg'.
+dg-init
+
+#if ![check_effective_target_fopenmp] {
+#  return
+#}
+
+# Turn on OpenMP.
+lappend ALWAYS_CFLAGS "additional_flags=-fopenmp"
+
+# Gather a list of all tests.
+set tests [lsort [find $srcdir/$subdir *.c]]
+
+set ld_library_path $always_ld_library_path
+append ld_library_path [gcc-set-multilib-library-path $GCC_UNDER_TEST]
+set_ld_library_path_env_vars
+
+# Main loop.
+dg-runtest $tests "" $sve_flags
+
+# All done.
+dg-finish
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c
new file mode 100644
index 0000000..58674e2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/firstprivate.c
@@ -0,0 +1,129 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <omp.h>
+
+static void __attribute__ ((noipa))
+vec_compare (svint32_t *x, svint32_t y)
+{
+  svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), *x, y));
+
+  if (svptest_any (svptrue_b32 (), p))
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+firstprivate_sections ()
+{
+  int b[8], c[8];
+  svint32_t vb, vc;
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 8; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+  vb = svld1_s32 (svptrue_b32 (), b);
+  vc = svld1_s32 (svptrue_b32 (), c);
+
+#pragma omp parallel sections firstprivate (vb, vc)
+  {
+    #pragma omp section
+    vec_compare (&vb, svindex_s32 (0, 1));
+    vec_compare (&vc, svindex_s32 (1, 1));
+
+    #pragma omp section
+    vec_compare (&vb, svindex_s32 (0, 1));
+    vec_compare (&vc, svindex_s32 (1, 1));
+  }
+
+}
+
+void __attribute__ ((noipa))
+firstprivate_for ()
+{
+
+  int a[32], b[32], c[32];
+  svint32_t va, vb, vc;
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 32; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+  vb = svindex_s32 (1, 0);
+  vc = svindex_s32 (0, 1);
+
+#pragma omp parallel for firstprivate (vb, vc) private (va)
+  for (i = 0; i < 4; i++)
+    {
+      svint32_t tb, tc;
+      vec_compare (&vb, svindex_s32 (1, 0));
+      vec_compare (&vc, svindex_s32 (0, 1));
+      tb = svld1_s32 (svptrue_b32 (), b + i * 8);
+      tc = svld1_s32 (svptrue_b32 (), c + i * 8);
+      va = svadd_s32_z (svptrue_b32 (), vb, vc);
+      va = svadd_s32_z (svptrue_b32 (), va, tb);
+      va = svadd_s32_z (svptrue_b32 (), va, tc);
+      svst1_s32 (svptrue_b32 (), a + i * 8, va);
+    }
+
+  for (i = 0; i < 32; i++)
+    if (a[i] != b[i] + c[i] + vb[i % 8] + vc[i % 8])
+      __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+firstprivate_distribute ()
+{
+
+  int a[32], b[32], c[32];
+  svint32_t va, vb, vc;
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 32; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+  vb = svindex_s32 (1, 0);
+  vc = svindex_s32 (0, 1);
+
+#pragma omp teams
+#pragma omp distribute firstprivate (vb, vc) private (va)
+  for (i = 0; i < 4; i++)
+    {
+      svint32_t tb, tc;
+      vec_compare (&vb, svindex_s32 (1, 0));
+      vec_compare (&vc, svindex_s32 (0, 1));
+      tb = svld1_s32 (svptrue_b32 (), b + i * 8);
+      tc = svld1_s32 (svptrue_b32 (), c + i * 8);
+      va = svadd_s32_z (svptrue_b32 (), vb, vc);
+      va = svadd_s32_z (svptrue_b32 (), va, tb);
+      va = svadd_s32_z (svptrue_b32 (), va, tc);
+      svst1_s32 (svptrue_b32 (), a + i * 8, va);
+    }
+
+  for (i = 0; i < 32; i++)
+    if (a[i] != b[i] + c[i] + vb[i % 8] + vc[i % 8])
+      __builtin_abort ();
+}
+
+int
+main ()
+{
+  firstprivate_for ();
+  firstprivate_sections ();
+  firstprivate_distribute ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c
new file mode 100644
index 0000000..2f93d7b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/lastprivate.c
@@ -0,0 +1,171 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <omp.h>
+
+static svint32_t __attribute__ ((noipa))
+foo (svint32_t *vb, svint32_t *vc, int tn)
+{
+  svint32_t temp = svindex_s32 (tn, 0);
+  temp = svadd_s32_z (svptrue_b32 (), temp, *vb);
+  return svadd_s32_z (svptrue_b32 (), temp, *vc);
+}
+
+void __attribute__ ((noipa))
+lastprivate_sections ()
+{
+  int a[8], b[8], c[8];
+  svint32_t va, vb, vc;
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 8; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+#pragma omp parallel sections lastprivate (vb, vc) num_threads (2)
+  {
+    #pragma omp section
+    vb = svld1_s32 (svptrue_b32 (), b);
+    #pragma omp section
+    vb = svld1_s32 (svptrue_b32 (), b);
+    vc = svld1_s32 (svptrue_b32 (), c);
+  }
+
+  va = svadd_s32_z (svptrue_b32 (), vb, vc);
+  svst1_s32 (svptrue_b32 (), a, va);
+
+  for (i = 0; i < 8; i++)
+    if (a[i] != b[i] + c[i])
+      __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+lastprivate_for ()
+{
+  int a[32], b[32], c[32];
+  int aa[8], bb[8], cc[8];
+  svint32_t va, vb, vc;
+  int i, tn;
+
+#pragma omp parallel for
+  for (i = 0; i < 32; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+#pragma omp parallel for lastprivate (va, vb, vc, tn)
+  for (i = 0; i < 4; i++)
+    {
+      vb = svld1_s32 (svptrue_b32 (), b + i * 8);
+      vc = svld1_s32 (svptrue_b32 (), c + i * 8);
+      tn = i;
+      va = foo (&vb, &vc, tn);
+      svst1_s32 (svptrue_b32 (), a + i * 8, va);
+    }
+
+  svst1_s32 (svptrue_b32 (), aa, va);
+  svst1_s32 (svptrue_b32 (), bb, vb);
+  svst1_s32 (svptrue_b32 (), cc, vc);
+
+  for (i = 0; i < 8; i++)
+    if (aa[i] != bb[i] + cc[i] + tn)
+      __builtin_abort ();
+
+  for (i = 0; i < 32; i++)
+    if (a[i] != b[i] + c[i] + i / 8)
+      __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+lastprivate_simd ()
+{
+
+  int a[64], b[64], c[64];
+  int aa[8], bb[8], cc[8];
+  svint32_t va, vb, vc;
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 64; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+#pragma omp simd lastprivate (va, vb, vc)
+  for (i = 0; i < 8; i++)
+    {
+      vb = svld1_s32 (svptrue_b32 (), b + i * 8);
+      vc = svld1_s32 (svptrue_b32 (), c + i * 8);
+      va = svadd_s32_z (svptrue_b32 (), vb, vc);
+      svst1_s32 (svptrue_b32 (), a + i * 8, va);
+    }
+
+  svst1_s32 (svptrue_b32 (), aa, va);
+  svst1_s32 (svptrue_b32 (), bb, vb);
+  svst1_s32 (svptrue_b32 (), cc, vc);
+
+  for (i = 0; i < 8; i++)
+    if (aa[i] != bb[i] + cc[i])
+      __builtin_abort ();
+
+  for (i = 0; i < 64; i++)
+    if (a[i] != b[i] + c[i])
+      __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+lastprivate_distribute ()
+{
+
+  int a[32], b[32], c[32];
+  int aa[8], bb[8], cc[8];
+  svint32_t va, vb, vc;
+  int i, tn;
+
+#pragma omp parallel for
+  for (i = 0; i < 32; i++)
+    {
+      b[i] = i;
+      c[i] = i + 1;
+    }
+
+#pragma omp teams
+#pragma omp distribute lastprivate (va, vb, vc, tn)
+  for (i = 0; i < 4; i++)
+    {
+      vb = svld1_s32 (svptrue_b32 (), b + i * 8);
+      vc = svld1_s32 (svptrue_b32 (), c + i * 8);
+      tn = i;
+      va = foo (&vb, &vc, tn);
+      svst1_s32 (svptrue_b32 (), a + i * 8, va);
+    }
+
+  svst1_s32 (svptrue_b32 (), aa, va);
+  svst1_s32 (svptrue_b32 (), bb, vb);
+  svst1_s32 (svptrue_b32 (), cc, vc);
+
+  for (i = 0; i < 8; i++)
+    if (aa[i] != bb[i] + cc[i] + tn)
+      __builtin_abort ();
+
+  for (i = 0; i < 32; i++)
+    if (a[i] != b[i] + c[i] + i / 8)
+      __builtin_abort ();
+}
+
+int
+main ()
+{
+  lastprivate_for ();
+  lastprivate_sections ();
+  lastprivate_simd ();
+  lastprivate_distribute ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/private.c b/libgomp/testsuite/libgomp.c-target/aarch64/private.c
new file mode 100644
index 0000000..fed5370
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/private.c
@@ -0,0 +1,107 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <omp.h>
+
+static void __attribute__ ((noipa))
+compare_vec (svint32_t *x, svint32_t y)
+{
+  svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), *x, y));
+
+  if (svptest_any (svptrue_b32 (), p))
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+private ()
+{
+  svint32_t a;
+#pragma omp parallel private (a) num_threads (10)
+  {
+    a = svindex_s32 (omp_get_thread_num (), 0);
+
+#pragma omp barrier
+    compare_vec (&a, svindex_s32 (omp_get_thread_num (), 0));
+  }
+}
+
+void __attribute__ ((noipa))
+firstprivate ()
+{
+  svint32_t a = svindex_s32 (1,1);
+  svint32_t b;
+
+#pragma omp parallel private (b) firstprivate (a) num_threads (12)
+  {
+    compare_vec (&a, svindex_s32 (1, 1));
+    b = svindex_s32 (omp_get_thread_num (), 0);
+
+#pragma omp barrier
+    compare_vec (&a, svindex_s32 (1, 1));
+    compare_vec (&b, svindex_s32 (omp_get_thread_num (), 0));
+    if (omp_get_thread_num () == 5)
+      {
+	a = svindex_s32 (1, 2);
+	b = svindex_s32 (10, 0);
+      }
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 5)
+      {
+	compare_vec (&a, svindex_s32 (1, 2));
+	compare_vec (&b, svindex_s32 (10, 0));
+      }
+    else
+      {
+	compare_vec (&a, svindex_s32 (1, 1));
+	compare_vec (&b, svindex_s32 (omp_get_thread_num (), 0));
+      }
+  }
+}
+
+void __attribute__ ((noipa))
+lastprivate ()
+{
+  svint32_t a = svindex_s32 (1,1);
+  svint32_t b;
+  int i;
+
+#pragma omp parallel for private (a) lastprivate (b)
+  for (i = 0; i < 16; i++)
+    {
+      b = svindex_s32 (i, 0);
+
+      compare_vec (&b, svindex_s32 (i, 0));
+      if (i == 5)
+	{
+	  a = svindex_s32 (1, 2);
+	  b = svindex_s32 (10, 0);
+	}
+      else
+	a = svindex_s32 (1, 1);
+
+      if (i == 5)
+	{
+	  compare_vec (&a, svindex_s32 (1, 2));
+	  compare_vec (&b, svindex_s32 (10, 0));
+	}
+      else
+	{
+	  compare_vec (&a, svindex_s32 (1, 1));
+	  compare_vec (&b, svindex_s32 (i, 0));
+	}
+    }
+
+  compare_vec (&b, svindex_s32 (15, 0));
+}
+
+int
+main ()
+{
+  private ();
+  firstprivate ();
+  lastprivate ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/shared.c b/libgomp/testsuite/libgomp.c-target/aarch64/shared.c
new file mode 100644
index 0000000..340a668
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/shared.c
@@ -0,0 +1,266 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <stdlib.h>
+#include <omp.h>
+
+static void __attribute__ ((noipa))
+compare_vec (svint32_t x, svint32_t y)
+{
+  svbool_t p = svnot_b_z (svptrue_b32 (), svcmpeq_s32 (svptrue_b32 (), x, y));
+
+  if (svptest_any (svptrue_b32 (), p))
+    __builtin_abort ();
+}
+
+static void __attribute__ ((noipa))
+compare_vecb (svbool_t x, svbool_t y)
+{
+  svbool_t p = sveor_b_z (svptrue_b32 (), x, y);
+
+  if (svptest_any (svptrue_b32 (), p))
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+implicit_shared_default (svint32_t a, svint32_t b, svbool_t p)
+{
+
+#pragma omp parallel default (shared) num_threads (10)
+  {
+    /* 'a', 'b' and 'p' are implicitly shared.  */
+    compare_vec (a, svindex_s32 (0, 1));
+    compare_vec (b, svindex_s32 (8, 1));
+    compare_vecb (p, svptrue_b32 ());
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 2)
+      a = svadd_s32_z (p, a, b);
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 0)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svindex_s32 (8, 1));
+	compare_vecb (p, svptrue_b32 ());
+	b = svadd_s32_z (p, a, b);
+      }
+
+#pragma omp barrier
+    compare_vec (a, svindex_s32 (8, 2));
+    compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+      }
+  }
+}
+
+void __attribute__ ((noipa))
+explicit_shared (svint32_t a, svint32_t b, svbool_t p)
+{
+
+#pragma omp parallel shared (a, b, p) num_threads (12)
+  {
+    /* 'a', 'b' and 'p' are explicitly shared.  */
+    compare_vec (a, svindex_s32 (0, 1));
+    compare_vec (b, svindex_s32 (8, 1));
+    compare_vecb (p, svptrue_b32 ());
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 2)
+      a = svadd_s32_z (p, a, b);
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 0)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svindex_s32 (8, 1));
+	compare_vecb (p, svptrue_b32 ());
+	b = svadd_s32_z (p, a, b);
+      }
+
+#pragma omp barrier
+    compare_vec (a, svindex_s32 (8, 2));
+    compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 0 || omp_get_thread_num () == 2)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+      }
+  }
+}
+
+void __attribute__ ((noipa))
+implicit_shared_no_default (svint32_t a, svint32_t b, svbool_t p)
+{
+
+#pragma omp parallel num_threads (16)
+  {
+    /* 'a', 'b' and 'p' are implicitly shared without default clause.  */
+    compare_vec (a, svindex_s32 (0, 1));
+    compare_vec (b, svindex_s32 (8, 1));
+    compare_vecb (p, svptrue_b32 ());
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 12)
+      a = svadd_s32_z (p, a, b);
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 15)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svindex_s32 (8, 1));
+	compare_vecb (p, svptrue_b32 ());
+	b = svadd_s32_z (p, a, b);
+      }
+
+#pragma omp barrier
+    compare_vec (a, svindex_s32 (8, 2));
+    compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+      }
+  }
+
+}
+
+void __attribute__ ((noipa))
+mix_shared (svint32_t b, svbool_t p)
+{
+
+  svint32_t a = svindex_s32 (0, 0);
+  int *m = (int *) malloc (8 * sizeof (int));
+  int i;
+
+#pragma omp parallel for
+  for (i = 0; i < 8; i++)
+    m[i] = i;
+
+#pragma omp parallel num_threads (16)
+  {
+    compare_vec (a, svindex_s32 (0, 0));
+    compare_vec (b, svindex_s32 (8, 1));
+
+#pragma omp barrier
+    /* 'm' is predetermined shared here.  'a' is implicitly shared here.  */
+    if (omp_get_thread_num () == 10)
+      a = svld1_s32 (svptrue_b32 (), m);
+
+#pragma omp barrier
+    /* 'a', 'b' and 'p' are implicitly shared without default clause.  */
+    compare_vec (a, svindex_s32 (0, 1));
+    compare_vec (b, svindex_s32 (8, 1));
+    compare_vecb (p, svptrue_b32 ());
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 12)
+      a = svadd_s32_z (p, a, b);
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 15)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svindex_s32 (8, 1));
+	compare_vecb (p, svptrue_b32 ());
+	b = svadd_s32_z (p, a, b);
+      }
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 12 || omp_get_thread_num () == 15)
+      {
+	compare_vec (a, svindex_s32 (8, 2));
+	compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+      }
+
+#pragma omp barrier
+    compare_vec (a, svindex_s32 (8, 2));
+    compare_vec (b, svadd_s32_z (p, svindex_s32 (8, 2), svindex_s32 (8, 1)));
+  }
+}
+
+#define N __ARM_FEATURE_SVE_BITS
+#define FIXED_ATTR __attribute__((arm_sve_vector_bits (N)))
+
+typedef svint32_t v8si FIXED_ATTR;
+
+void __attribute__ ((noipa))
+predetermined_shared_static (int n)
+{
+
+  int *m = (int *) malloc (8 * sizeof (int));
+  int i;
+
+#pragma omp parallel for
+  /* 'm' is predetermined shared here.  */
+  for (i = 0; i < 8; i++)
+    m[i] = i;
+
+  static v8si a = { 0, 1, 2, 3, 4, 5, 6, 7 };
+
+#pragma omp parallel num_threads (16)
+  {
+    /* 'a' is implicit shared here.  */
+    if (n == 0)
+      compare_vec (a, svindex_s32 (0, 1));
+
+    if (n == 1)
+      compare_vec (a, svindex_s32 (1, 1));
+
+#pragma omp barrier
+    if (omp_get_thread_num () == 12)
+      {
+	if (n == 0)
+	  compare_vec (a, svindex_s32 (0, 1));
+
+	if (n == 1)
+	  compare_vec (a, svindex_s32 (1, 1));
+
+	a = svadd_s32_z (svptrue_b32 (), a, svindex_s32 (1, 0));
+      }
+
+#pragma omp barrier
+    if (n == 0)
+      compare_vec (a, svindex_s32 (1, 1));
+
+    if (n == 1)
+      compare_vec (a, svindex_s32 (2, 1));
+  }
+}
+
+
+int
+main ()
+{
+  svint32_t x = svindex_s32 (0, 1);
+  svint32_t y = svindex_s32 (8, 1);
+  svbool_t p = svptrue_b32 ();
+
+  /* Implicit shared.  */
+  implicit_shared_default (x, y, p);
+
+  /* Explicit shared.  */
+  explicit_shared (x, y, p);
+
+  /* Implicit shared with no default clause.  */
+  implicit_shared_no_default (x, y, p);
+
+  /* Mix shared.  */
+  mix_shared (y, p);
+
+  /* Predetermined and static shared.  */
+  predetermined_shared_static (0);
+  predetermined_shared_static (1);
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c b/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c
new file mode 100644
index 0000000..14642c9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/simd-aligned.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <stdint.h>
+
+#define N 256
+
+int a[N] __attribute__ ((aligned (64)));
+int b[N] __attribute__ ((aligned (64)));
+
+void __attribute__ ((noipa))
+foo (int *p, int *q, svint32_t *onesp)
+{
+   svint32_t va, vc;
+   int i;
+   uint64_t sz = svcntw ();
+
+#pragma omp simd aligned(p, q : 64) aligned (onesp : 128) \
+		 private (va, vc) nontemporal (va, vc)
+  for (i = 0; i < N; i++)
+    {
+      if (i % sz == 0)
+	{
+	  va = svld1_s32 (svptrue_b32 (), p);
+	  vc = svadd_s32_z (svptrue_b32 (), va, *onesp);
+	  svst1_s32 (svptrue_b32 (), q, vc);
+	  q += sz;
+	}
+    }
+}
+
+int
+main ()
+{
+  svint32_t ones __attribute__ ((aligned(128))) = svindex_s32 (1, 0);
+
+  for (int i = 0; i < N; i++)
+    {
+      a[i] = 1;
+      b[i] = 0;
+    }
+
+  foo (a, b, &ones);
+
+  for (int i = 0; i < N; i++)
+    if (b[i] != 2)
+      __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c b/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c
new file mode 100644
index 0000000..6fe4616
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/simd-nontemporal.c
@@ -0,0 +1,51 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <stdint.h>
+
+#define N 256
+
+int a[N] __attribute__ ((aligned (64)));
+int b[N] __attribute__ ((aligned (64)));
+
+void __attribute__ ((noipa))
+foo (int *p, int *q)
+{
+   svint32_t va, vb, vc;
+   int i;
+   uint64_t sz = svcntw ();
+
+#pragma omp simd aligned(p, q : 64) private (va, vb, vc) \
+		 nontemporal (va, vb, vc)
+  for (i = 0; i < N; i++)
+    {
+      if (i % sz == 0)
+	{
+	  va = svld1_s32 (svptrue_b32 (), p);
+	  vb = svindex_s32 (1, 0);
+	  vc = svadd_s32_z (svptrue_b32 (), va, vb);
+	  svst1_s32 (svptrue_b32 (), q, vc);
+	  q += sz;
+	}
+    }
+}
+
+int
+main ()
+{
+
+  for (int i = 0; i < N; i++)
+    {
+      a[i] = 1;
+      b[i] = 0;
+    }
+
+  foo (a, b);
+
+  for (int i = 0; i < N; i++)
+    if (b[i] != 2)
+      __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c b/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c
new file mode 100644
index 0000000..aa7d2f9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/threadprivate.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-msve-vector-bits=256 -fopenmp -O2" } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+#include <stdint.h>
+
+typedef __SVInt32_t v8si __attribute__ ((arm_sve_vector_bits(256)));
+
+v8si vec1;
+#pragma omp threadprivate (vec1)
+
+void __attribute__ ((noipa))
+foo ()
+{
+  int64_t res = 0;
+
+  vec1 = svindex_s32 (1, 0);
+
+#pragma omp parallel copyin (vec1) firstprivate (res) num_threads(10)
+  {
+    res = svaddv_s32 (svptrue_b32 (), vec1);
+
+#pragma omp barrier
+    if (res != 8LL)
+      __builtin_abort ();
+  }
+}
+
+int
+main ()
+{
+  int64_t res = 0;
+
+#pragma omp parallel firstprivate (res) num_threads(10)
+  {
+    vec1 = svindex_s32 (1, 0);
+    res = svaddv_s32 (svptrue_b32 (), vec1);
+
+#pragma omp barrier
+    if (res != 8LL)
+      __builtin_abort ();
+  }
+
+  foo ();
+}
diff --git a/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
new file mode 100644
index 0000000..02e02dc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-target/aarch64/udr-sve.c
@@ -0,0 +1,134 @@
+/* { dg-do run { target aarch64_sve256_hw } } */
+/* { dg-options "-march=armv8-a+sve -msve-vector-bits=256 -fopenmp -O2" } */
+
+#include <arm_sve.h>
+
+#pragma omp declare reduction (+:svint32_t: omp_out = svadd_s32_z (svptrue_b32(), omp_in, omp_out)) \
+		    initializer (omp_priv = svindex_s32 (0, 0))
+
+void __attribute__ ((noipa))
+parallel_reduction ()
+{
+  int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+  int b[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+  svint32_t va = svld1_s32 (svptrue_b32 (), b);
+  int i = 0;
+  int64_t res;
+
+  #pragma omp parallel reduction (+:va, i)
+    {
+      va = svld1_s32 (svptrue_b32 (), a);
+      i++;
+    }
+
+  res = svaddv_s32 (svptrue_b32 (), va);
+
+  if (res != i * 8)
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+for_reduction ()
+{
+  int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+  int b[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+  svint32_t va = svld1_s32 (svptrue_b32 (), b);
+  int j;
+  int64_t res;
+
+  #pragma omp parallel for reduction (+:va)
+  for (j = 0; j < 8; j++)
+    va += svld1_s32 (svptrue_b32 (), a);
+
+  res = svaddv_s32 (svptrue_b32 (), va);
+
+  if (res != 64)
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+simd_reduction ()
+{
+  int a[8];
+  svint32_t va = svindex_s32 (0, 0);
+  int i = 0;
+  int j;
+  int64_t res = 0;
+
+  for (j = 0; j < 8; j++)
+    a[j] = 1;
+
+  #pragma omp simd reduction (+:va)
+  for (j = 0; j < 16; j++)
+    va += svld1_s32 (svptrue_b32 (), a);
+
+  res = svaddv_s32 (svptrue_b32 (), va);
+
+  if (res != 128)
+    __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+inscan_reduction_incl ()
+{
+  svint32_t va = svindex_s32 (0, 0);
+  int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+  int b[64] = { 0 };
+  int j;
+  int64_t res = 0;
+
+  #pragma omp parallel for reduction (inscan, +:va)
+  for (j = 0; j < 8; j++)
+    {
+      va += svld1_s32 (svptrue_b32 (), a);
+      #pragma omp scan inclusive (va)
+      svst1_s32 (svptrue_b32 (), b + j * 8, va);
+    }
+
+  res = svaddv_s32 (svptrue_b32 (), va);
+
+  if (res != 64)
+    __builtin_abort ();
+
+  for (j = 0; j < 64; j+=8)
+    if (b[j] != (j / 8 + 1))
+      __builtin_abort ();
+}
+
+void __attribute__ ((noipa))
+inscan_reduction_excl ()
+{
+  svint32_t va = svindex_s32 (0, 0);
+  int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+  int b[64] = { 0 };
+  int j;
+  int64_t res = 0;
+
+  #pragma omp parallel for reduction (inscan, +:va)
+  for (j = 0; j < 8; j++)
+    {
+      svst1_s32 (svptrue_b32 (), b + j * 8, va);
+      #pragma omp scan exclusive (va)
+      va += svld1_s32 (svptrue_b32 (), a);
+    }
+
+  res = svaddv_s32 (svptrue_b32 (), va);
+
+  if (res != 64)
+    __builtin_abort ();
+
+  for (j = 0; j < 64; j+=8)
+    if (b[j] != j / 8)
+      __builtin_abort ();
+}
+
+
+int
+main ()
+{
+  parallel_reduction ();
+  for_reduction ();
+  simd_reduction ();
+  inscan_reduction_incl ();
+  inscan_reduction_excl ();
+}
diff --git a/libgomp/testsuite/libgomp.c/append-args-fr-1.c b/libgomp/testsuite/libgomp.c/append-args-fr-1.c
new file mode 100644
index 0000000..2fd7eda
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/append-args-fr-1.c
@@ -0,0 +1,232 @@
+/* { dg-do run } */
+
+#include "append-args-fr.h"
+
+enum { host_device, nvptx_device, gcn_device } used_device_type, used_device_type2;
+static int used_device_num, used_device_num2;
+static omp_interop_fr_t expected_fr, expected_fr2;
+static _Bool is_targetsync, is_targetsync2;
+
+void
+check_interop (omp_interop_t obj)
+{
+  if (used_device_type == host_device)
+    check_host (obj);
+  else if (used_device_type == nvptx_device)
+    check_nvptx (obj, used_device_num, expected_fr, is_targetsync);
+  else if (used_device_type == gcn_device)
+    check_gcn (obj, used_device_num, expected_fr, is_targetsync);
+  else
+    __builtin_unreachable ();
+
+  #pragma omp interop use(obj)
+}
+
+void
+check_interop2 (omp_interop_t obj, omp_interop_t obj2)
+{
+  check_interop (obj);
+
+  #pragma omp interop use(obj2)
+
+  if (used_device_type2 == host_device)
+    check_host (obj2);
+  else if (used_device_type2 == nvptx_device)
+    check_nvptx (obj2, used_device_num2, expected_fr2, is_targetsync2);
+  else if (used_device_type2 == gcn_device)
+    check_gcn (obj2, used_device_num2, expected_fr2, is_targetsync2);
+  else
+    __builtin_unreachable ();
+}
+
+
+/* Check no args + one interop arg - and no prefer_type.  */
+
+int f0_1_tg_ (omp_interop_t obj) { check_interop (obj); return 4242; }
+#pragma omp declare variant(f0_1_tg_)     match(construct={dispatch}) append_args(interop(target))
+int f0_1_tg () { assert (false); return 42; }
+
+void f0_1_tgsy_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_)   match(construct={dispatch}) append_args(interop(targetsync))
+void f0_1_tgsy () { assert (false); }
+
+int f0_1_tgtgsy_ (omp_interop_t obj) { check_interop (obj); return 3333; }
+#pragma omp declare variant(f0_1_tgtgsy_) match(construct={dispatch}) append_args(interop(targetsync,target))
+int f0_1_tgtgsy () { assert (false); return 33; }
+
+
+/* And with PREFER_TYPE.  */
+
+// nv: cuda, gcn: -, -, hip
+void f0_1_tgsy_c_cd_hi_hs_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_c_cd_hi_hs_)   match(construct={dispatch}) \
+            append_args(interop(targetsync, prefer_type("cuda","cuda_driver", "hip", "hsa")))
+void f0_1_tgsy_c_cd_hi_hs () { assert (false); }
+
+// nv: -, cuda_driver, gcn: hsa
+void f0_1_tgsy_hs_cd_c_hi_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_hs_cd_c_hi_)   match(construct={dispatch}) \
+            append_args(interop(targetsync, prefer_type({attr("ompx_foo")}, {fr("hsa")}, {attr("ompx_bar"), fr("cuda_driver"), attr("ompx_foobar")},{fr("cuda")}, {fr("hip")})))
+void f0_1_tgsy_hs_cd_c_hi () { assert (false); }
+
+// nv: -, hip, gcn: hsa
+void f0_1_tgsy_hs_hi_cd_c_ (omp_interop_t obj) { check_interop (obj); }
+#pragma omp declare variant(f0_1_tgsy_hs_hi_cd_c_)   match(construct={dispatch}) \
+            append_args(interop(targetsync, prefer_type("hsa", "hip", "cuda_driver", "cuda")))
+void f0_1_tgsy_hs_hi_cd_c () { assert (false); }
+
+
+void
+check_f0 ()
+{
+  if (used_device_type == nvptx_device)
+    expected_fr = omp_ifr_cuda;
+  else if (used_device_type == gcn_device)
+    expected_fr = omp_ifr_hip;
+  else  /* host; variable shall not be accessed  */
+    expected_fr = omp_ifr_level_zero;
+
+  int i;
+  if (used_device_num == DEFAULT_DEVICE)
+    {
+      is_targetsync = 0;
+      #pragma omp dispatch
+        i = f0_1_tg ();
+      assert (i == 4242);
+
+      is_targetsync = 1;
+      #pragma omp dispatch
+        f0_1_tgsy ();
+
+      #pragma omp dispatch
+        i = f0_1_tgtgsy ();
+      assert (i == 3333);
+
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hip;
+      #pragma omp dispatch
+	 f0_1_tgsy_c_cd_hi_hs ();
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda_driver;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch
+	f0_1_tgsy_hs_cd_c_hi ();
+  
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_hip;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch
+	f0_1_tgsy_hs_hi_cd_c ();
+    }
+  else
+    {
+      is_targetsync = 0;
+      #pragma omp dispatch device(used_device_num)
+        i = f0_1_tg ();
+      assert (i == 4242);
+
+      is_targetsync = 1;
+      #pragma omp dispatch device(used_device_num)
+        f0_1_tgsy ();
+
+      #pragma omp dispatch device(used_device_num)
+        i = f0_1_tgtgsy ();
+      assert (i == 3333);
+
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hip;
+      #pragma omp dispatch device(used_device_num)
+	 f0_1_tgsy_c_cd_hi_hs ();
+
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_cuda_driver;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch device(used_device_num)
+	f0_1_tgsy_hs_cd_c_hi ();
+  
+      if (used_device_type == nvptx_device)
+	expected_fr = omp_ifr_hip;
+      else if (used_device_type == gcn_device)
+	expected_fr = omp_ifr_hsa;
+      #pragma omp dispatch device(used_device_num)
+	f0_1_tgsy_hs_hi_cd_c ();
+    }
+}
+
+
+
+void
+do_check (int dev)
+{
+  int num_dev = omp_get_num_devices ();
+  const char *dev_type;
+  if (dev != DEFAULT_DEVICE)
+    omp_set_default_device (dev);
+  int is_nvptx = on_device_arch_nvptx ();
+  int is_gcn = on_device_arch_gcn ();
+  int is_host;
+ 
+  if (dev != DEFAULT_DEVICE)
+    is_host = dev == -1 || dev == num_dev;
+  else
+    {
+      int def_dev = omp_get_default_device ();
+      is_host = def_dev == -1 || def_dev == num_dev;
+    }
+
+  assert (is_nvptx + is_gcn + is_host == 1);
+
+  if (num_dev > 0 && dev != DEFAULT_DEVICE)
+    {
+      if (is_host)
+	omp_set_default_device (0);
+      else
+	omp_set_default_device (-1);
+    }
+
+  used_device_num = dev;
+  if (is_host)
+    {
+      dev_type = "host";
+      used_device_type = host_device;
+    }
+  else if (is_nvptx)
+    {
+      dev_type = "nvptx";
+      used_device_type = nvptx_device;
+    }
+  else if (is_gcn)
+    {
+      dev_type = "gcn";
+      used_device_type = gcn_device;
+    }
+
+  printf ("Running on the %s device (%d)\n", dev_type, dev);
+  check_f0 ();
+}
+
+
+
+int
+main ()
+{
+  do_check (DEFAULT_DEVICE);
+  int ndev = omp_get_num_devices ();
+  for (int dev = -1; dev < ndev; dev++)
+    do_check (dev);
+  for (int dev = -1; dev < ndev; dev++)
+    {
+      omp_set_default_device (dev);
+      do_check (DEFAULT_DEVICE);
+    }
+}
diff --git a/libgomp/testsuite/libgomp.c/append-args-fr.h b/libgomp/testsuite/libgomp.c/append-args-fr.h
new file mode 100644
index 0000000..9f6ca04
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/append-args-fr.h
@@ -0,0 +1,305 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+/* Provides:  */
+
+#define DEFAULT_DEVICE -99
+
+void check_host (omp_interop_t obj);
+void check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync);
+void check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync);
+
+
+/* The following assumes that when a nvptx device is available,
+   cuda/cuda_driver/hip are supported.
+   And that likewise when a gcn device is available that the
+   plugin also can not only the HSA but also the HIP library
+   such that hsa/hip are supported.
+   For the host, omp_interop_none is expected.
+
+   Otherwise, it only does some basic tests without checking
+   that the returned result really makes sense.  */
+
+void check_type (omp_interop_t obj)
+{
+  const char *type;
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_fr_id);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "omp_interop_t") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_fr_name);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "const char *") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_vendor);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "int") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_vendor_name);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "const char *") == 0);
+  else
+    assert (type == NULL);
+
+  type = omp_get_interop_type_desc (obj, omp_ipr_device_num);
+  if (obj != omp_interop_none)
+    assert (strcmp (type, "int") == 0);
+  else
+    assert (type == NULL);
+
+  if (obj != omp_interop_none)
+    return;
+  assert (omp_get_interop_type_desc (obj, omp_ipr_platform) == NULL);
+  assert (omp_get_interop_type_desc (obj, omp_ipr_device) == NULL);
+  assert (omp_get_interop_type_desc (obj, omp_ipr_device_context) == NULL);
+  assert (omp_get_interop_type_desc (obj, omp_ipr_targetsync) == NULL);
+}
+
+
+void
+check_host (omp_interop_t obj)
+{
+  assert (obj == omp_interop_none);
+  check_type (obj);
+}
+
+
+void
+check_nvptx (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync)
+{
+  assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
+
+  omp_interop_rc_t ret_code = omp_irc_no_value;
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
+
+  assert (ret_code == omp_irc_success);
+  assert (fr == expected_fr);
+
+  ret_code = omp_irc_no_value;
+  const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
+
+  assert (ret_code == omp_irc_success);
+  if (fr == omp_ifr_cuda)
+    assert (strcmp (fr_name, "cuda") == 0);
+  else if (fr == omp_ifr_cuda_driver)
+    assert (strcmp (fr_name, "cuda_driver") == 0);
+  else if (fr == omp_ifr_hip)
+    assert (strcmp (fr_name, "hip") == 0);
+  else
+    assert (0);
+
+  ret_code = omp_irc_no_value;
+  int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (vendor == 11);  /* Nvidia */
+
+  ret_code = omp_irc_no_value;
+  const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (strcmp (vendor_name, "nvidia") == 0);
+
+  ret_code = omp_irc_no_value;
+  int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
+  assert (ret_code == omp_irc_success);
+  if (dev == DEFAULT_DEVICE)
+    assert (dev_num == omp_get_default_device ());
+  else
+    assert (dev_num == dev);
+
+  /* Platform: N/A.  */
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+
+  /* Device: int / CUdevice / hipDevice_t -- all internally an 'int'.  */
+  ret_code = omp_irc_no_value;
+  int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
+
+  /* CUDA also starts from 0 and goes to < n with cudaGetDeviceCount(&cn).  */
+  assert (ret_code == omp_irc_success);
+  assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
+
+  /* Device context: N/A / CUcontext / hipCtx_t -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
+
+  if (fr == omp_ifr_cuda)
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (ctx == NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_success);
+      assert (ctx != NULL);
+    }
+
+  /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
+
+  if (is_targetsync)  /* no targetsync */
+    {
+      assert (ret_code == omp_irc_success);
+      assert (stream != NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (stream == NULL);
+    }
+
+  check_type (obj);
+  if (fr == omp_ifr_cuda)
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "int") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "cudaStream_t") == 0);
+    }
+  else if (fr == omp_ifr_cuda_driver)
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "CUdevice") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "CUcontext") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "CUstream") == 0);
+    }
+  else
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
+    }
+}
+
+
+void
+check_gcn (omp_interop_t obj, int dev, omp_interop_fr_t expected_fr, _Bool is_targetsync)
+{
+  assert (obj != omp_interop_none && obj != (omp_interop_t) -1L);
+
+  omp_interop_rc_t ret_code = omp_irc_no_value;
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &ret_code);
+
+  assert (ret_code == omp_irc_success);
+  assert (fr == expected_fr);
+
+  ret_code = omp_irc_no_value;
+  const char *fr_name = omp_get_interop_str (obj, omp_ipr_fr_name, &ret_code);
+
+   assert (ret_code == omp_irc_success);
+  if (fr == omp_ifr_hip)
+    assert (strcmp (fr_name, "hip") == 0);
+  else if (fr == omp_ifr_hsa)
+    assert (strcmp (fr_name, "hsa") == 0);
+  else
+    assert (0);
+
+  ret_code = omp_irc_no_value;
+  int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (vendor == 1);  /* Amd */
+
+  ret_code = omp_irc_no_value;
+  const char *vendor_name = omp_get_interop_str (obj, omp_ipr_vendor_name, &ret_code);
+  assert (ret_code == omp_irc_success);
+  assert (strcmp (vendor_name, "amd") == 0);
+
+  ret_code = omp_irc_no_value;
+  int dev_num = (int) omp_get_interop_int (obj, omp_ipr_device_num, &ret_code);
+  assert (ret_code == omp_irc_success);
+  if (dev == DEFAULT_DEVICE)
+    assert (dev_num == omp_get_default_device ());
+  else
+    assert (dev_num == dev);
+
+  /* Platform: N/A.  */
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_int (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_ptr (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+  ret_code = omp_irc_success;
+  (void) omp_get_interop_str (obj, omp_ipr_platform, &ret_code);
+  assert (ret_code == omp_irc_no_value);
+
+  /* Device: hipDevice_t / hsa_agent_t* -- hip is internally an 'int'.  */
+  ret_code = omp_irc_no_value;
+  if (fr == omp_ifr_hip)
+    {
+      /* HIP also starts from 0 and goes to < n as with cudaGetDeviceCount(&cn).  */
+      int fr_device = (int) omp_get_interop_int (obj, omp_ipr_device, &ret_code);
+      assert (ret_code == omp_irc_success);
+      assert (fr_device >= 0 && fr_device < omp_get_num_devices ());
+    }
+  else
+    {
+      void *agent = omp_get_interop_ptr (obj, omp_ipr_device, &ret_code);
+      assert (ret_code == omp_irc_success);
+      assert (agent != NULL);
+    }
+
+  /* Device context: hipCtx_t / N/A -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, &ret_code);
+  if (fr == omp_ifr_hip)
+    {
+      assert (ret_code == omp_irc_success);
+      assert (ctx != NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (ctx == NULL);
+    }
+
+  /* Stream/targetsync: cudaStream_t / CUstream / hipStream_t -- a pointer.  */
+  ret_code = omp_irc_out_of_range;
+  void *stream = omp_get_interop_ptr (obj, omp_ipr_targetsync, &ret_code);
+
+  if (is_targetsync)
+    {
+      assert (ret_code == omp_irc_success);
+      assert (stream != NULL);
+    }
+  else
+    {
+      assert (ret_code == omp_irc_no_value);
+      assert (stream == NULL);
+    }
+
+  check_type (obj);
+  if (fr == omp_ifr_hip)
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hipDevice_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "hipCtx_t") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hipStream_t") == 0);
+    }
+  else
+    {
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_platform), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device), "hsa_agent_t *") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_device_context), "N/A") == 0);
+      assert (strcmp (omp_get_interop_type_desc (obj, omp_ipr_targetsync), "hsa_queue_t *") == 0);
+    }
+}
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c
new file mode 100644
index 0000000..e6941d3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-3-sm61.c
@@ -0,0 +1,8 @@
+/* { dg-do link { target { offload_target_nvptx } } } */
+/* { dg-additional-options -foffload=nvptx-none } */
+/* { dg-additional-options "-foffload=-misa=sm_61 -foffload=-mptx=_" } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-3.h"
+
+/* { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump "= f61 \\(\\);" "optimized" } } */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-3.h b/libgomp/testsuite/libgomp.c/declare-variant-3.h
index c9c8f4a..f5695a2 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-3.h
+++ b/libgomp/testsuite/libgomp.c/declare-variant-3.h
@@ -37,6 +37,13 @@ f53 (void)
 
 __attribute__ ((noipa))
 int
+f61 (void)
+{
+  return 61;
+}
+
+__attribute__ ((noipa))
+int
 f70 (void)
 {
   return 70;
@@ -68,6 +75,7 @@ f89 (void)
 #pragma omp declare variant (f37) match (device={isa("sm_37")})
 #pragma omp declare variant (f52) match (device={isa("sm_52")})
 #pragma omp declare variant (f53) match (device={isa("sm_53")})
+#pragma omp declare variant (f61) match (device={isa("sm_61")})
 #pragma omp declare variant (f70) match (device={isa("sm_70")})
 #pragma omp declare variant (f75) match (device={isa("sm_75")})
 #pragma omp declare variant (f80) match (device={isa("sm_80")})
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c
new file mode 100644
index 0000000..d1df550
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4-gfx942.c
@@ -0,0 +1,8 @@
+/* { dg-do link { target { offload_target_amdgcn } } } */
+/* { dg-additional-options -foffload=amdgcn-amdhsa } */
+/* { dg-additional-options -foffload=-march=gfx942 } */
+/* { dg-additional-options "-foffload=-fdump-tree-optimized" } */
+
+#include "declare-variant-4.h"
+
+/* { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump "= gfx942 \\(\\);" "optimized" } } */
diff --git a/libgomp/testsuite/libgomp.c/declare-variant-4.h b/libgomp/testsuite/libgomp.c/declare-variant-4.h
index 53788d2..2257f4c 100644
--- a/libgomp/testsuite/libgomp.c/declare-variant-4.h
+++ b/libgomp/testsuite/libgomp.c/declare-variant-4.h
@@ -37,6 +37,13 @@ gfx90c (void)
 
 __attribute__ ((noipa))
 int
+gfx942 (void)
+{
+  return 0x942;
+}
+
+__attribute__ ((noipa))
+int
 gfx1030 (void)
 {
   return 0x1030;
@@ -68,6 +75,7 @@ gfx1103 (void)
 #pragma omp declare variant(gfx908) match(device = {isa("gfx908")})
 #pragma omp declare variant(gfx90a) match(device = {isa("gfx90a")})
 #pragma omp declare variant(gfx90c) match(device = {isa("gfx90c")})
+#pragma omp declare variant(gfx942) match(device = {isa("gfx942")})
 #pragma omp declare variant(gfx1030) match(device = {isa("gfx1030")})
 #pragma omp declare variant(gfx1036) match(device = {isa("gfx1036")})
 #pragma omp declare variant(gfx1100) match(device = {isa("gfx1100")})
diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-full.c b/libgomp/testsuite/libgomp.c/interop-cublas-full.c
new file mode 100644
index 0000000..2df5277
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cublas-full.c
@@ -0,0 +1,176 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-additional-options "-lcublas" } */
+
+/* NOTE: This file is also included by libgomp.c-c++-common/interop-cudablas-libonly.c
+   to test the fallback version.  */
+
+/* Check whether cuBlas' daxpy works with an interop object.
+     daxpy(N, DA, DX, INCX, DY, INCY)
+   calculates (for DX = DY = 1):
+     DY(1:N) =  DY(1:N) + DA * DX(1:N)
+   and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document  */
+
+/* Minimal check whether CUDA works - by checking whether the API routines
+   seem to work.  This includes a fallback if the header is not
+   available.  */
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+  #include <cuda.h>
+  #include <cudaTypedefs.h>
+  #include <cuda_runtime.h>
+  #include <cublas_v2.h>
+
+#else
+  /* Add a poor man's fallback declaration.  */
+  #if USE_CUDA_FALLBACK_HEADER
+    // Don't warn.
+  #elif !__has_include(<cuda.h>)
+    #warning "Using GCC's cuda.h as fallback for cuda.h"
+  #elif !__has_include(<cudaTypedefs.h>)
+    #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+  #elif !__has_include(<cuda_runtime.h>)
+    #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+  #else
+    #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+  #endif
+  #include "../../../include/cuda/cuda.h"
+
+  typedef enum {
+    CUBLAS_STATUS_SUCCESS = 0,
+  } cublasStatus_t;
+
+  typedef CUstream cudaStream_t;
+  typedef struct cublasContext* cublasHandle_t;
+
+  #define cublasCreate cublasCreate_v2
+  cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+  #define cublasSetStream cublasSetStream_v2
+  cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+  #define cublasDaxpy cublasDaxpy_v2
+  cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+#endif
+
+static int used_variant = 0;
+
+void
+run_cuBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+  used_variant = 1;
+
+  omp_interop_rc_t res;
+  cublasStatus_t stat;
+
+  omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success && fr == omp_ifr_cuda);
+
+  cudaStream_t stream = (cudaStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  cublasHandle_t handle;
+  stat = cublasCreate (&handle);
+  assert (stat == CUBLAS_STATUS_SUCCESS);
+
+  stat = cublasSetStream (handle, stream);
+  assert (stat == CUBLAS_STATUS_SUCCESS);
+
+  /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space.  */
+  stat = cublasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+  assert (stat == CUBLAS_STATUS_SUCCESS);
+}
+
+
+#pragma omp declare variant(run_cuBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("cuda")))
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+  used_variant = 2;
+
+  if (incx == 1 && incy == 1)
+    #pragma omp simd
+    for (int i = 0; i < n; i++)
+      dy[i] += da * dx[i];
+  else
+    {
+      int ix = 0;
+      int iy = 0;
+      for (int i = 0; i < n; i++)
+	{
+	  dy[iy] += da * dx[ix];
+	  ix += incx;
+	  iy += incy;
+	}
+    }
+}
+
+
+void
+run_test (int dev)
+{
+  constexpr int N = 1024;
+
+  // A = {1,2,...,N}
+  // B = {-1, -2, ..., N}
+  // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+  //    = B + 3*A
+  // -> B' = {0, 2, 4, 6, ... }
+
+  double A[N], B[N];
+  double factor = 3.0;
+  for (int i = 0; i < N; i++)
+    {
+      A[i] = i;
+      B[i] = -i;
+    }
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target enter data device(dev) map(A, B)
+    }
+
+  used_variant = 99;
+  #pragma omp dispatch device(dev)
+    run_daxpy (N, factor, A, 1, B, 1);  
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+      int tmp = omp_get_default_device ();
+      omp_set_default_device (dev);
+      if (on_device_arch_nvptx ())
+	assert (used_variant == 1);
+      else
+	assert (used_variant == 2);
+      omp_set_default_device (tmp);
+    }
+  else
+    assert (used_variant == 2);
+
+  for (int i = 0; i < N; i++)
+    assert (B[i] == 2*i);
+}
+
+int   
+main () 
+{   
+  int ndev = omp_get_num_devices ();
+
+  for (int dev = 0; dev <= ndev; dev++)
+    run_test (dev);
+  run_test (omp_initial_device);  
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c
new file mode 100644
index 0000000..89c0652
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cublas-libonly.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target openacc_libcublas } */
+/* { dg-additional-options "-lcublas" } */
+
+/* Same as interop-cudablas-full.c, but also works if the header is not available. */
+
+#define USE_CUDA_FALLBACK_HEADER 1
+#include "interop-cublas-full.c"
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-full.c b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
new file mode 100644
index 0000000..c48a934
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-full.c
@@ -0,0 +1,162 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+/* NOTE: This file is also included by libgomp.c-c++-common/interop-cuda-libonly.c
+   to test the fallback version, which defines USE_CUDA_FALLBACK_HEADER.  */
+
+/* Minimal check whether CUDA works - by checking whether the API routines
+   seem to work.  This includes a fallback if the header is not
+   available.  */
+
+#include <assert.h>
+#include <omp.h>
+
+#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+  #include <cuda.h>
+  #include <cudaTypedefs.h>
+  #include <cuda_runtime.h>
+
+#else
+  /* Add a poor man's fallback declaration.  */
+  #if USE_CUDA_FALLBACK_HEADER
+    // Don't warn.
+  #elif !__has_include(<cuda.h>)
+    #warning "Using GCC's cuda.h as fallback for cuda.h"
+  #elif !__has_include(<cudaTypedefs.h>)
+    #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+  #else
+    #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+  #endif
+  #include "../../../include/cuda/cuda.h"
+
+  typedef int cudaError_t;
+  typedef CUstream cudaStream_t;
+  enum {
+    cudaSuccess = 0
+  };
+
+  enum cudaDeviceAttr {
+    cudaDevAttrClockRate = 13,
+    cudaDevAttrMaxGridDimX = 5
+  };
+
+  cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int);
+  cudaError_t cudaStreamQuery(cudaStream_t);
+  CUresult cuCtxGetApiVersion(CUcontext, unsigned int *);
+  CUresult cuStreamGetCtx (CUstream, CUcontext *);
+#endif
+
+int
+main ()
+{
+  int ivar;
+  unsigned uvar;
+  omp_interop_rc_t res;
+  omp_interop_t obj_cuda = omp_interop_none;
+  omp_interop_t obj_cuda_driver = omp_interop_none;
+  cudaError_t cuda_err;
+  CUresult cu_err;
+
+  #pragma omp interop init(target, targetsync, prefer_type("cuda") : obj_cuda) \
+		      init(target, targetsync, prefer_type("cuda_driver") : obj_cuda_driver) \
+
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success);
+  assert (fr == omp_ifr_cuda);
+
+  fr = (omp_interop_fr_t) omp_get_interop_int (obj_cuda_driver, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success);
+  assert (fr == omp_ifr_cuda_driver);
+
+  ivar = (int) omp_get_interop_int (obj_cuda, omp_ipr_vendor, &res);
+  assert (res == omp_irc_success);
+  assert (ivar == 11);
+
+  ivar = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_vendor, &res);
+  assert (res == omp_irc_success);
+  assert (ivar == 11);
+
+
+  /* Check whether the omp_ipr_device -> cudaDevice_t yields a valid device.  */
+
+  CUdevice cu_dev = (int) omp_get_interop_int (obj_cuda_driver, omp_ipr_device, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume a clock size is available and > 1 GHz; value is in kHz.  */
+  cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cu_dev);
+  assert (cu_err == CUDA_SUCCESS);
+  assert (ivar > 1000000 /* kHz */);
+
+  /* Assume that the MaxGridDimX is available and > 1024.  */
+  cu_err = cuDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cu_dev);
+  assert (cu_err == CUDA_SUCCESS);
+  assert (ivar > 1024);
+
+  int cuda_dev = (int) omp_get_interop_int (obj_cuda, omp_ipr_device, &res);
+  assert (res == omp_irc_success);
+  assert (cuda_dev == (CUdevice) cu_dev); // Assume they are the same ...
+
+  /* Assume a clock size is available and > 1 GHz; value is in kHz.  */
+  cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrClockRate, cuda_dev);
+  assert (cuda_err == cudaSuccess);
+  assert (ivar > 1000000 /* kHz */);
+
+  /* Assume that the MaxGridDimX is available and > 1024.  */
+  cuda_err = cudaDeviceGetAttribute (&ivar, cudaDevAttrMaxGridDimX, cuda_dev);
+  assert (cuda_err == cudaSuccess);
+  assert (ivar > 1024);
+
+
+
+
+  /* Check whether the omp_ipr_device_context -> CUcontext yields a context.  */
+
+  CUcontext cu_ctx = (CUcontext) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_device_context, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume API Version > 0 for Nvidia, cudaErrorNotSupported for AMD.  */
+  uvar = 99;
+  cu_err = cuCtxGetApiVersion (cu_ctx, &uvar);
+  assert (cu_err == CUDA_SUCCESS);
+  assert (uvar > 0);
+
+
+  /* Check whether the omp_ipr_targetsync -> cudaStream_t yields a stream.  */
+
+  cudaStream_t cuda_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  CUstream cu_sm = (cudaStream_t) omp_get_interop_ptr (obj_cuda_driver, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  assert ((void*) cu_sm != (void*) cuda_sm); // Type compatible but should have created two streams
+
+  int dev_stream = 99;
+#if CUDA_VERSION >= 12080
+  cuda_err = cudaStreamGetDevice (cuda_sm, &dev_stream);
+  assert (cuda_err == cudaSuccess);
+#else
+  cu_err = cuStreamGetCtx (cu_sm, &cu_ctx) != CUDA_SUCCESS;
+  if (cu_err == CUDA_SUCCESS)
+    cuda_err = cuCtxPushCurrent (cu_ctx) != CUDA_SUCCESS;
+  if (cu_err == CUDA_SUCCESS)
+    cuda_err = cuCtxGetDevice (&dev_stream) != CUDA_SUCCESS;
+  if (cu_err == CUDA_SUCCESS)
+    cu_err = cuCtxPopCurrent (&cu_ctx) != CUDA_SUCCESS;
+  assert (cu_err == CUDA_SUCCESS);
+#endif
+  assert (dev_stream == cuda_dev);
+
+  /* All jobs should have been completed (as there were none none)  */
+  cuda_err = cudaStreamQuery (cuda_sm);
+  assert (cuda_err == cudaSuccess);
+
+  cu_err = cuStreamQuery (cu_sm);
+  assert (cu_err == CUDA_SUCCESS);
+
+  #pragma omp interop destroy(obj_cuda, obj_cuda_driver)
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
new file mode 100644
index 0000000..bc257a2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c
@@ -0,0 +1,11 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_libcudart } */
+/* { dg-require-effective-target openacc_libcuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+/* Same as interop-cuda-full.c, but also works if the header is not available. */
+
+#define USE_CUDA_FALLBACK_HEADER 1
+#include "interop-cuda-full.c"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
new file mode 100644
index 0000000..bd44f44
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c
@@ -0,0 +1,10 @@
+/* { dg-do run { target { offload_device_gcn } } } */
+/* { dg-do link { target { ! offload_device_gcn } } } */
+
+/* { dg-require-effective-target gomp_hip_header_amd } */
+/* { dg-require-effective-target gomp_libamdhip64 } */
+/* { dg-additional-options "-lamdhip64" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
new file mode 100644
index 0000000..91ad987
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c
@@ -0,0 +1,11 @@
+/* { dg-do run { target { offload_device_gcn } } } */
+/* { dg-do link { target { ! offload_device_gcn } } } */
+
+/* { dg-require-effective-target gomp_libamdhip64 } */
+/* { dg-additional-options "-lamdhip64" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
new file mode 100644
index 0000000..d5dc236
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c
@@ -0,0 +1,11 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-require-effective-target gomp_hip_header_nvidia } */
+/* { dg-additional-options "-lcuda -lcudart -Wno-deprecated-declarations" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
new file mode 100644
index 0000000..7cff2cb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c
@@ -0,0 +1,13 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_libcudart } */
+/* { dg-require-effective-target openacc_libcuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+#define USE_CUDA_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
new file mode 100644
index 0000000..7b7dc74
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c
@@ -0,0 +1,12 @@
+/* { dg-do run { target { offload_device_nvptx } } } */
+/* { dg-do link { target { ! offload_device_nvptx } } } */
+
+/* { dg-require-effective-target openacc_cudart } */
+/* { dg-require-effective-target openacc_cuda } */
+/* { dg-additional-options "-lcuda -lcudart" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hip.h b/libgomp/testsuite/libgomp.c/interop-hip.h
new file mode 100644
index 0000000..20a1ccb
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hip.h
@@ -0,0 +1,234 @@
+/* Minimal check whether HIP works - by checking whether the API routines
+   seem to work.  This includes various fallbacks if the header is not
+   available.  */
+
+#include <assert.h>
+#include <omp.h>
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if __has_include(<hip/hip_runtime_api.h>) && !defined(USE_HIP_FALLBACK_HEADER)
+  #include <hip/hip_runtime_api.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_AMD__"
+  #endif
+
+  typedef struct ihipStream_t* hipStream_t;
+  typedef struct ihipCtx_t* hipCtx_t;
+  typedef int hipError_t;
+  typedef int hipDevice_t;
+  enum {
+    hipSuccess = 0,
+    hipErrorNotSupported = 801
+  };
+
+  typedef enum hipDeviceAttribute_t {
+    hipDeviceAttributeClockRate = 5,
+    hipDeviceAttributeMaxGridDimX = 29
+  } hipDeviceAttribute_t;
+
+  hipError_t hipDeviceGetAttribute (int *, hipDeviceAttribute_t, hipDevice_t);
+  hipError_t hipCtxGetApiVersion (hipCtx_t, int *);
+  hipError_t hipStreamGetDevice (hipStream_t, hipDevice_t *);
+  hipError_t hipStreamQuery (hipStream_t);
+
+#elif defined(__HIP_PLATFORM_NVIDIA__)
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_NVIDIA__"
+  #endif
+
+  #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+    #include <cuda.h>
+    #include <cudaTypedefs.h>
+    #include <cuda_runtime.h>
+  #else
+    #if defined(USE_CUDA_FALLBACK_HEADER)
+       // no warning
+    #elif !__has_include(<cuda.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda.h"
+    #elif !__has_include(<cudaTypedefs.h>)
+      #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+    #else
+      #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+    #endif
+
+    #include "../../../include/cuda/cuda.h"
+
+    typedef int cudaError_t;
+    enum {
+      cudaSuccess = 0
+    };
+
+    enum cudaDeviceAttr {
+      cudaDevAttrClockRate = 13,
+      cudaDevAttrMaxGridDimX = 5
+    };
+
+    cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int);
+    CUresult cuCtxGetApiVersion(CUcontext, unsigned int *);
+    CUresult cuStreamGetCtx (CUstream, CUcontext *);
+  #endif
+
+  typedef CUstream hipStream_t;
+  typedef CUcontext hipCtx_t;
+  typedef CUdevice hipDevice_t;
+
+  typedef int hipError_t;
+  typedef int hipDevice_t;
+  enum {
+    hipSuccess = 0,
+    hipErrorNotSupported = 801
+  };
+
+
+  typedef enum hipDeviceAttribute_t {
+    hipDeviceAttributeClockRate = 5,
+    hipDeviceAttributeMaxGridDimX = 29
+  } hipDeviceAttribute_t;
+
+  inline static hipError_t
+  hipDeviceGetAttribute (int *ival, hipDeviceAttribute_t attr, hipDevice_t dev)
+  {
+    enum cudaDeviceAttr cuattr;
+    switch (attr)
+      {
+      case hipDeviceAttributeClockRate:
+	cuattr = cudaDevAttrClockRate;
+	break;
+      case hipDeviceAttributeMaxGridDimX:
+	cuattr = cudaDevAttrMaxGridDimX;
+	break;
+      default:
+	assert (0);
+      }
+    return cudaDeviceGetAttribute (ival, cuattr, dev) != cudaSuccess;
+  }
+
+  inline static hipError_t
+  hipCtxGetApiVersion (hipCtx_t ctx, int *ver)
+  {
+    unsigned uver;
+    hipError_t err;
+    err = cuCtxGetApiVersion (ctx, &uver) != CUDA_SUCCESS;
+    *ver = (int) uver;
+    return err;
+  }
+
+  inline static hipError_t
+  hipStreamGetDevice (hipStream_t stream, hipDevice_t *dev)
+  {
+#if CUDA_VERSION >= 12080
+    return cudaStreamGetDevice (stream, dev);
+#else
+    hipError_t err;
+    CUcontext ctx;
+    err = cuStreamGetCtx (stream, &ctx) != CUDA_SUCCESS;
+    if (err == hipSuccess)
+      err = cuCtxPushCurrent (ctx) != CUDA_SUCCESS;
+    if (err == hipSuccess)
+      err = cuCtxGetDevice (dev) != CUDA_SUCCESS;
+    if (err == hipSuccess)
+      err = cuCtxPopCurrent (&ctx) != CUDA_SUCCESS;
+    return err;
+#endif
+  }
+
+  inline static hipError_t
+  hipStreamQuery (hipStream_t stream)
+  {
+    return cuStreamQuery (stream) != CUDA_SUCCESS;
+  }
+
+#else
+  #error "should be unreachable"
+#endif
+
+int
+main ()
+{
+  int ivar;
+  omp_interop_rc_t res;
+  omp_interop_t obj = omp_interop_none;
+  hipError_t hip_err;
+
+  #pragma omp interop init(target, targetsync, prefer_type("hip") : obj)
+
+  omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success);
+  assert (fr == omp_ifr_hip);
+
+  ivar = (int) omp_get_interop_int (obj, omp_ipr_vendor, &res);
+  assert (res == omp_irc_success);
+  int vendor_is_amd = ivar == 1;
+  #if defined(__HIP_PLATFORM_AMD__)
+    assert (ivar == 1);
+  #elif defined(__HIP_PLATFORM_NVIDIA__)
+    assert (ivar == 11);
+  #else
+    assert (0);
+  #endif
+
+
+  /* Check whether the omp_ipr_device -> hipDevice_t yields a valid device.  */
+
+  hipDevice_t hip_dev = (int) omp_get_interop_int (obj, omp_ipr_device, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume a clock size is available and > 1 GHz; value is in kHz.  */
+  hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeClockRate, hip_dev);
+  assert (hip_err == hipSuccess);
+  assert (ivar > 1000000 /* kHz */);
+
+  /* Assume that the MaxGridDimX is available and > 1024.  */
+  hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeMaxGridDimX, hip_dev);
+  assert (hip_err == hipSuccess);
+  assert (ivar > 1024);
+
+
+  /* Check whether the omp_ipr_device_context -> hipCtx_t yields a context.  */
+
+  hipCtx_t hip_ctx = (hipCtx_t) omp_get_interop_ptr (obj, omp_ipr_device_context, &res);
+  assert (res == omp_irc_success);
+
+  /* Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD.  */
+  ivar = -99;
+  #pragma GCC diagnostic push
+  #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+    hip_err = hipCtxGetApiVersion (hip_ctx, &ivar);
+  #pragma GCC diagnostic pop
+
+  if (vendor_is_amd)
+    assert (hip_err == hipErrorNotSupported && ivar == -99);
+  else
+    {
+      assert (hip_err == hipSuccess);
+      assert (ivar > 0);
+    }
+
+
+  /* Check whether the omp_ipr_targetsync -> hipStream_t yields a stream.  */
+
+  hipStream_t hip_sm = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  hipDevice_t dev_stream = 99;
+  hip_err = hipStreamGetDevice (hip_sm, &dev_stream);
+  assert (hip_err == hipSuccess);
+  assert (dev_stream == hip_dev);
+
+  /* All jobs should have been completed (as there were none none)  */
+  hip_err = hipStreamQuery (hip_sm);
+  assert (hip_err == hipSuccess);
+
+  #pragma omp interop destroy(obj)
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c
new file mode 100644
index 0000000..53c05bd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target gomp_hip_header_amd } */
+/* { dg-require-effective-target gomp_libhipblas } */
+/* { dg-additional-options "-lhipblas" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c
new file mode 100644
index 0000000..0ea3133
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-amd-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target gomp_libhipblas } */
+/* { dg-additional-options "-lhipblas" } */
+
+#define __HIP_PLATFORM_AMD__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c
new file mode 100644
index 0000000..ed428c6
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-full.c
@@ -0,0 +1,7 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-require-effective-target gomp_hip_header_nvidia } */
+/* { dg-additional-options "-lcublas -Wno-deprecated-declarations" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c
new file mode 100644
index 0000000..1a31b30
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-headers.c
@@ -0,0 +1,9 @@
+/* { dg-require-effective-target openacc_libcublas } */
+/* { dg-additional-options "-lcublas" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+#define USE_CUDA_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c
new file mode 100644
index 0000000..f85c13b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas-nvidia-no-hip-header.c
@@ -0,0 +1,8 @@
+/* { dg-require-effective-target openacc_cublas } */
+/* { dg-additional-options "-lcublas" } */
+
+#define __HIP_PLATFORM_NVIDIA__ 1
+
+#define USE_HIP_FALLBACK_HEADER 1
+
+#include "interop-hipblas.h"
diff --git a/libgomp/testsuite/libgomp.c/interop-hipblas.h b/libgomp/testsuite/libgomp.c/interop-hipblas.h
new file mode 100644
index 0000000..d7cb174
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hipblas.h
@@ -0,0 +1,240 @@
+/* Check whether hipBlas' daxpy works with an interop object.
+     daxpy(N, DA, DX, INCX, DY, INCY)
+   calculates (for DX = DY = 1):
+     DY(1:N) =  DY(1:N) + DA * DX(1:N)
+   and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
+
+Based on the interop example in OpenMP's example document  */
+
+/* Minimal check whether HIP works - by checking whether the API routines
+   seem to work.  This includes a fallback if the header is not
+   available.  */
+
+#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
+  #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
+#endif
+
+
+#include <assert.h>
+#include <omp.h>
+#include "../libgomp.c-c++-common/on_device_arch.h"
+
+
+#if __has_include(<hipblas/hipblas.h>) && (__has_include(<library_types.h>) || !defined(__HIP_PLATFORM_NVIDIA__)) && !defined(USE_HIP_FALLBACK_HEADER)
+  #ifdef __HIP_PLATFORM_NVIDIA__
+    /* There seems to be an issue with hip/library_types.h including
+       CUDA's "library_types.h". Include CUDA's one explicitly here.
+       Could possibly worked around by using -isystem vs. -I.  */
+    #include <library_types.h>
+
+    /* For some reasons, the following symbols do not seem to get
+       mapped from HIP to CUDA, causing link errors.  */
+    #define hipblasSetStream cublasSetStream_v2
+    #define hipblasDaxpy cublasDaxpy_v2
+    #define hipblasCreate cublasCreate_v2
+  #endif
+  #include <hipblas/hipblas.h>
+
+#elif defined(__HIP_PLATFORM_AMD__)
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_AMD__"
+  #endif
+
+  typedef enum
+  {
+    HIPBLAS_STATUS_SUCCESS = 0
+
+  } hipblasStatus_t;
+
+  typedef struct ihipStream_t* hipStream_t;
+  typedef void* hipblasHandle_t;
+
+  hipblasStatus_t hipblasCreate (hipblasHandle_t*);
+  hipblasStatus_t hipblasSetStream (hipblasHandle_t, hipStream_t);
+  hipblasStatus_t hipblasDaxpy (hipblasHandle_t, int, const double*, const double*, int, double*, int);
+
+#else
+  /* Add a poor man's fallback declaration.  */
+  #if !defined(USE_HIP_FALLBACK_HEADER)
+    #warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_NVIDA__"
+  #endif
+
+  #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
+    #include <cuda.h>
+    #include <cudaTypedefs.h>
+    #include <cuda_runtime.h>
+    #include <cublas_v2.h>
+
+  #else
+    /* Add a poor man's fallback declaration.  */
+    #if defined(USE_CUDA_FALLBACK_HEADER)
+      // no warning
+    #elif !__has_include(<cuda.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda.h"
+    #elif !__has_include(<cudaTypedefs.h>)
+      #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
+    #elif !__has_include(<cuda_runtime.h>)
+      #warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
+    #else
+      #warning "Using GCC's cuda.h as fallback for cublas_v2.h"
+    #endif
+    #include "../../../include/cuda/cuda.h"
+
+    typedef enum {
+      CUBLAS_STATUS_SUCCESS = 0,
+    } cublasStatus_t;
+
+    typedef CUstream cudaStream_t;
+    typedef struct cublasContext* cublasHandle_t;
+
+    #define cublasCreate cublasCreate_v2
+    cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
+
+    #define cublasSetStream cublasSetStream_v2
+    cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
+
+    #define cublasDaxpy cublasDaxpy_v2
+    cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
+  #endif
+
+  #define HIPBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
+  #define hipblasStatus_t cublasStatus_t
+  #define hipStream_t cudaStream_t
+  #define hipblasHandle_t cublasHandle_t
+  #define hipblasCreate cublasCreate
+  #define hipblasSetStream cublasSetStream
+  #define hipblasDaxpy cublasDaxpy
+#endif
+
+static int used_variant = 0;
+
+void
+run_hipBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
+{
+  used_variant = 1;
+
+  omp_interop_rc_t res;
+  hipblasStatus_t stat;
+
+  omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
+  assert (res == omp_irc_success && fr == omp_ifr_hip);
+
+  hipStream_t stream = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
+  assert (res == omp_irc_success);
+
+  hipblasHandle_t handle;
+  stat = hipblasCreate (&handle);
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+  stat = hipblasSetStream (handle, stream);
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+
+  /* 'da' can be in host or device space, 'dx' and 'dy' must be in device space.  */
+  stat = hipblasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
+  assert (stat == HIPBLAS_STATUS_SUCCESS);
+}
+
+#if defined(__HIP_PLATFORM_AMD__)
+#pragma omp declare variant(run_hipBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("amdgcn")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("hip")))
+#elif defined(__HIP_PLATFORM_NVIDIA__) 
+#pragma omp declare variant(run_hipBlasdaxpy) \
+                       match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
+                       adjust_args(need_device_ptr : dx, dy) \
+                       append_args(interop(targetsync, prefer_type("hip")))
+#else
+ #error "wrong platform"
+#endif
+
+void
+run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
+{
+  used_variant = 2;
+
+  if (incx == 1 && incy == 1)
+    #pragma omp simd
+    for (int i = 0; i < n; i++)
+      dy[i] += da * dx[i];
+  else
+    {
+      int ix = 0;
+      int iy = 0;
+      for (int i = 0; i < n; i++)
+	{
+	  dy[iy] += da * dx[ix];
+	  ix += incx;
+	  iy += incy;
+	}
+    }
+}
+
+
+void
+run_test (int dev)
+{
+  constexpr int N = 1024;
+
+  // A = {1,2,...,N}
+  // B = {-1, -2, ..., N}
+  // B' = daxpy (N, 3, A, incx=1, B, incy=1)
+  //    = B + 3*A
+  // -> B' = {0, 2, 4, 6, ... }
+
+  double A[N], B[N];
+  double factor = 3.0;
+  for (int i = 0; i < N; i++)
+    {
+      A[i] = i;
+      B[i] = -i;
+    }
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target enter data device(dev) map(A, B)
+    }
+
+  used_variant = 99;
+  #pragma omp dispatch device(dev)
+    run_daxpy (N, factor, A, 1, B, 1);  
+
+  if (dev != omp_initial_device && dev != omp_get_num_devices ())
+    {
+      #pragma omp target exit data device(dev) map(release: A) map(from: B)
+
+      int tmp = omp_get_default_device ();
+      omp_set_default_device (dev);
+#if defined(__HIP_PLATFORM_AMD__)
+      if (on_device_arch_gcn ())
+#else
+      if (on_device_arch_nvptx ())
+#endif
+	assert (used_variant == 1);
+      else
+	assert (used_variant == 2);
+      omp_set_default_device (tmp);
+    }
+  else
+    assert (used_variant == 2);
+
+  for (int i = 0; i < N; i++)
+    assert (B[i] == 2*i);
+}
+
+int   
+main () 
+{   
+  int ndev = omp_get_num_devices ();
+
+  for (int dev = 0; dev <= ndev; dev++)
+    run_test (dev);
+  run_test (omp_initial_device);  
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/interop-hsa.c b/libgomp/testsuite/libgomp.c/interop-hsa.c
new file mode 100644
index 0000000..21ac91c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/interop-hsa.c
@@ -0,0 +1,205 @@
+/* { dg-additional-options "-ldl" } */
+/* { dg-require-effective-target offload_device_gcn }
+   The 'asm' insert is valid for GCN only:
+   { dg-additional-options -foffload=amdgcn-amdhsa } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include <assert.h>
+#include <dlfcn.h>
+#include "../../../include/hsa.h"
+#include "../../config/gcn/libgomp-gcn.h"
+
+#define STACKSIZE (100 * 1024)
+#define HEAPSIZE (10 * 1024 * 1024)
+#define ARENASIZE HEAPSIZE
+
+/* This code fragment must be optimized or else the host-fallback kernel has
+ * invalid ASM inserts.  The rest of the file can be compiled safely at -O0.  */
+#pragma omp declare target
+uintptr_t __attribute__((optimize("O1")))
+get_kernel_ptr ()
+{
+  uintptr_t val;
+  if (!omp_is_initial_device ())
+    /* "main._omp_fn.0" is the name GCC gives the first OpenMP target
+     * region in the "main" function.
+     * The ".kd" suffix is added by the LLVM assembler when it creates the
+     * kernel meta-data, and this is what we need to launch a kernel.  */
+    asm ("s_getpc_b64 %0\n\t"
+	 "s_add_u32 %L0, %L0, main._omp_fn.0.kd@rel32@lo+4\n\t"
+	 "s_addc_u32 %H0, %H0, main._omp_fn.0.kd@rel32@hi+4"
+	 : "=Sg"(val));
+  return val;
+}
+#pragma omp end declare target
+
+int
+main(int argc, char** argv)
+{
+
+  /* Load the HSA runtime DLL.  */
+  void *hsalib = dlopen ("libhsa-runtime64.so.1", RTLD_LAZY);
+  assert (hsalib);
+
+  hsa_status_t (*hsa_signal_create) (hsa_signal_value_t initial_value,
+				     uint32_t num_consumers,
+				     const hsa_agent_t *consumers,
+				     hsa_signal_t *signal)
+    = dlsym (hsalib, "hsa_signal_create");
+  assert (hsa_signal_create);
+
+  uint64_t (*hsa_queue_load_write_index_relaxed) (const hsa_queue_t *queue)
+    = dlsym (hsalib, "hsa_queue_load_write_index_relaxed");
+  assert (hsa_queue_load_write_index_relaxed);
+
+  void (*hsa_signal_store_relaxed) (hsa_signal_t signal,
+				    hsa_signal_value_t value)
+    = dlsym (hsalib, "hsa_signal_store_relaxed");
+  assert (hsa_signal_store_relaxed);
+
+  hsa_signal_value_t (*hsa_signal_wait_relaxed) (hsa_signal_t signal,
+						 hsa_signal_condition_t condition,
+						 hsa_signal_value_t compare_value,
+						 uint64_t timeout_hint,
+						 hsa_wait_state_t wait_state_hint)
+    = dlsym (hsalib, "hsa_signal_wait_relaxed");
+  assert (hsa_signal_wait_relaxed);
+
+  void (*hsa_queue_store_write_index_relaxed) (const hsa_queue_t *queue,
+					       uint64_t value)
+    = dlsym (hsalib, "hsa_queue_store_write_index_relaxed");
+  assert (hsa_queue_store_write_index_relaxed);
+
+  hsa_status_t (*hsa_signal_destroy) (hsa_signal_t signal)
+    = dlsym (hsalib, "hsa_signal_destroy");
+  assert (hsa_signal_destroy);
+
+  /* Set up the device data environment.  */
+  int test_data_value = 0;
+#pragma omp target enter data map(test_data_value)
+
+  /* Get the interop details.  */
+  int device_num = omp_get_default_device();
+  hsa_agent_t *gpu_agent;
+  hsa_queue_t *hsa_queue = NULL;
+
+  omp_interop_t interop = omp_interop_none;
+#pragma omp interop init(target, targetsync, prefer_type("hsa"): interop) device(device_num)
+  assert (interop != omp_interop_none);
+
+  omp_interop_rc_t retcode;
+  omp_interop_fr_t fr = omp_get_interop_int (interop, omp_ipr_fr_id, &retcode);
+  assert (retcode == omp_irc_success);
+  assert (fr == omp_ifr_hsa);
+
+  gpu_agent = omp_get_interop_ptr(interop, omp_ipr_device, &retcode);
+  assert (retcode == omp_irc_success);
+
+  hsa_queue = omp_get_interop_ptr(interop, omp_ipr_targetsync, &retcode);
+  assert (retcode == omp_irc_success);
+  assert (hsa_queue);
+
+  /* Call an offload kernel via OpenMP/libgomp.
+   *
+   * This kernel serves two purposes:
+   *   1) Lookup the device-side load-address of itself (thus avoiding the
+   *   need to access the libgomp internals).
+   *   2) Count how many times it is called.
+   * We then call it once using OpenMP, and once manually, and check
+   * the counter reads "2".  */
+  uint64_t kernel_object = 0;
+#pragma omp target map(from:kernel_object) map(present,alloc:test_data_value)
+  {
+    kernel_object = get_kernel_ptr ();
+    ++test_data_value;
+  }
+
+  assert (kernel_object != 0);
+
+  /* Configure the same kernel to run again, using HSA manually this time.  */
+  hsa_status_t status;
+  hsa_signal_t signal;
+  status = hsa_signal_create(1, 0, NULL, &signal);
+  assert (status == HSA_STATUS_SUCCESS);
+
+  /* The kernel is built by GCC for OpenMP, so we need to pass the same
+   * data pointers that libgomp would pass in.  */
+  struct {
+    uintptr_t test_data_value;
+    uintptr_t kernel_object;
+  } tgtaddrs;
+
+#pragma omp target data use_device_addr(test_data_value)
+  {
+    tgtaddrs.test_data_value = (uintptr_t)&test_data_value;
+    tgtaddrs.kernel_object = (uintptr_t)omp_target_alloc (8, device_num);
+  }
+
+  /* We also need to duplicate the launch ABI used by plugin-gcn.c.  */
+  struct kernargs_abi args;    /* From libgomp-gcn.h.  */
+  args.dummy1 = (int64_t)&tgtaddrs;
+  args.out_ptr = (int64_t)malloc (sizeof (struct output)); /* Host side.  */
+  args.heap_ptr = (int64_t)omp_target_alloc (HEAPSIZE, device_num);
+  args.arena_ptr = (int64_t)omp_target_alloc (ARENASIZE, device_num);
+  args.stack_ptr = (int64_t)omp_target_alloc (STACKSIZE, device_num);
+  args.arena_size_per_team = ARENASIZE;
+  args.stack_size_per_thread = STACKSIZE;
+
+  /* Build the HSA dispatch packet, and insert it into the queue.  */
+  uint64_t packet_id = hsa_queue_load_write_index_relaxed (hsa_queue);
+  const uint32_t queueMask = hsa_queue->size - 1;
+  hsa_kernel_dispatch_packet_t *dispatch_packet =
+    &(((hsa_kernel_dispatch_packet_t *)
+	  (hsa_queue->base_address))[packet_id & queueMask]);
+
+  dispatch_packet->setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
+  dispatch_packet->workgroup_size_x = 1;
+  dispatch_packet->workgroup_size_y = 64;
+  dispatch_packet->workgroup_size_z = 1;
+  dispatch_packet->grid_size_x = 1;
+  dispatch_packet->grid_size_y = 64;
+  dispatch_packet->grid_size_z = 1;
+  dispatch_packet->completion_signal = signal;
+  dispatch_packet->kernel_object = kernel_object;
+  dispatch_packet->kernarg_address = &args;
+  dispatch_packet->private_segment_size = 0;
+  dispatch_packet->group_segment_size = 1536;
+
+  uint16_t header = 0;
+  header |= HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
+  header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
+
+  /* Finish writing the packet header with an atomic release.  */
+  __atomic_store_n((uint16_t*)dispatch_packet, header, __ATOMIC_RELEASE);
+
+  hsa_queue_store_write_index_relaxed (hsa_queue, packet_id + 1);
+  
+  ;/* Run the kernel and wait for it to complete.  */
+  hsa_signal_store_relaxed(hsa_queue->doorbell_signal, packet_id);
+  while (hsa_signal_wait_relaxed(signal, HSA_SIGNAL_CONDITION_LT, 1,
+	UINT64_MAX, HSA_WAIT_STATE_ACTIVE) != 0)
+    ;
+
+  /* Clean up HSA.  */
+  hsa_signal_destroy(signal);
+  free ((void*)args.out_ptr);
+  omp_target_free ((void*)args.heap_ptr, device_num);
+  omp_target_free ((void*)args.arena_ptr, device_num);
+  omp_target_free ((void*)args.stack_ptr, device_num);
+  omp_target_free ((void*)tgtaddrs.kernel_object, device_num);
+
+  /* Clean up OpenMP.  */
+  #pragma omp interop destroy(interop)
+
+  /* Bring the data back from the device.  */
+#pragma omp target exit data map(test_data_value)
+
+  /* Ensure the kernel was called twice.  Once by OpenMP, once by HSA.  */
+  assert (test_data_value == 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c
new file mode 100644
index 0000000..3220828
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-2.c
@@ -0,0 +1,74 @@
+int
+main ()
+{
+  int i, n;
+  int data[] = {1,2};
+  struct S { int **ptrset; };
+
+// -----------------------------------
+
+/* The produced mapping for sptr1->ptrset[i][:n]
+
+   GOMP_MAP_STRUCT (size = 1)
+      GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+   GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+      GOMP_MAP_ATTACH
+   GOMP_MAP_ATTACH -> attaching to 2nd GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+
+which get split into 3 separate map_vars call; in particular,
+the latter is separate and points to an unmpapped variable.
+
+Thus, it failed with:
+   libgomp: pointer target not mapped for attach  */
+
+  struct S s1, *sptr1;
+  s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s1.ptrset[0] = data;
+  s1.ptrset[1] = data;
+  s1.ptrset[2] = data;
+  sptr1 = &s1;
+
+  i = 1;
+  n = 0;
+  #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3])
+  #pragma omp target enter data map(sptr1->ptrset[i][:n])
+
+  #pragma omp target exit data map(sptr1->ptrset[i][:n])
+  #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3])
+
+  __builtin_free (s1.ptrset);
+
+// -----------------------------------
+
+/* The produced mapping for sptr2->ptrset[i][:n] is similar:
+
+   GOMP_MAP_STRUCT (size = 1)
+      GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+   GOMP_MAP_TO  ! this one has now a finite size
+      GOMP_MAP_ATTACH
+   GOMP_MAP_ATTACH -> attach to the GOMP_MAP_TO
+
+As the latter GOMP_MAP_ATTACH has now a pointer target,
+the attachment worked.  */
+
+  struct S s2, *sptr2;
+  s2.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s2.ptrset[0] = data;
+  s2.ptrset[1] = data;
+  s2.ptrset[2] = data;
+  sptr2 = &s2;
+
+  i = 1;
+  n = 2;
+  #pragma omp target enter data map(sptr2[:1], sptr2->ptrset[:3])
+  #pragma omp target enter data map(sptr2->ptrset[i][:n])
+
+  #pragma omp target
+    if (sptr2->ptrset[1][0] != 1 || sptr2->ptrset[1][1] != 2)
+      __builtin_abort ();
+
+  #pragma omp target exit data map(sptr2->ptrset[i][:n])
+  #pragma omp target exit data map(sptr2[:1], sptr2->ptrset[:3])
+
+  __builtin_free (s2.ptrset);
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c
new file mode 100644
index 0000000..580c6ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized-3.c
@@ -0,0 +1,50 @@
+int
+main ()
+{
+  int i, n;
+  int data[] = {1,2};
+  struct S {
+    int **ptrset;
+    int **ptrset2;
+  };
+
+  /* This is the same as target-map-zero-sized-3.c, but by mixing
+     mapped and non-mapped items, the mapping before the ATTACH
+     might (or here: is) not actually associated with the the
+     pointer used for attaching. Thus, if one does a simple
+
+       if (openmp_p
+	   && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA)
+	   && mapnum == 1)
+     check in target.c's gomp_map_vars_internal will fail
+     as mapnum > 1 but still the map associated with this
+     ATTACH is in a different set.  */
+
+  struct S s1, *sptr1;
+  s1.ptrset = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s1.ptrset2 = (int **) __builtin_malloc (sizeof(void*) * 3);
+  s1.ptrset[0] = data;
+  s1.ptrset[1] = data;
+  s1.ptrset[2] = data;
+  s1.ptrset2[0] = data;
+  s1.ptrset2[1] = data;
+  s1.ptrset2[2] = data;
+  sptr1 = &s1;
+
+  i = 1;
+  n = 0;
+  #pragma omp target enter data map(data)
+  #pragma omp target enter data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3])
+  #pragma omp target enter data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+
+  #pragma omp target map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+    if (sptr1->ptrset2[1][0] != 1 || sptr1->ptrset2[1][1] != 2)
+      __builtin_abort ();
+
+  #pragma omp target exit data map(sptr1->ptrset[i][:n], sptr1->ptrset2[i][:n])
+  #pragma omp target exit data map(sptr1[:1], sptr1->ptrset[:3], sptr1->ptrset2[:3])
+  #pragma omp target exit data map(data)
+
+  __builtin_free (s1.ptrset);
+  __builtin_free (s1.ptrset2);
+}
diff --git a/libgomp/testsuite/libgomp.c/target-map-zero-sized.c b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c
new file mode 100644
index 0000000..7c4ab80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-map-zero-sized.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O0" }  */
+
+/* Issue showed up in the real world when large data was distributed
+   over multiple MPI progresses - such that for one process n == 0
+   happend at run time.
+
+   Before  map(var[:0])  and  map(var[:n])  with n > 0 was handled,
+   this patch now also handles  map(var[:n]) with n == 0.
+
+   Failed before with "libgomp: pointer target not mapped for attach".  */
+
+/* Here, the base address is shifted - which should have no effect,
+   but must work as well.  */
+void
+with_offset ()
+{
+  struct S {
+     int *ptr1, *ptr2;
+  };
+  struct S s1, s2;
+  int *a, *b, *c, *d;
+  s1.ptr1 = (int *) 0L;
+  s1.ptr2 = (int *) 0xdeedbeef;
+  s2.ptr1 = (int *) 0L;
+  s2.ptr2 = (int *) 0xdeedbeef;
+  a = (int *) 0L;
+  b = (int *) 0xdeedbeef;
+  c = (int *) 0L;
+  d = (int *) 0xdeedbeef;
+
+  int n1, n2, n3, n4;
+  n1 = n2 = n3 = n4 = 0;
+
+  #pragma omp target enter data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+
+  #pragma omp target map(s2.ptr1[4:n1], s2.ptr2[2:n2], c[6:n3], d[9:n4])
+  {
+    if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef
+	|| c != (void *) 0L || d != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target exit data map(s1.ptr1[4:n1], s1.ptr2[6:n2], a[3:n3], b[2:n4])
+}
+
+int
+main ()
+{
+  struct S {
+     int *ptr1, *ptr2;
+  };
+  struct S s1, s2;
+  int *a, *b, *c, *d;
+  s1.ptr1 = (int *) 0L;
+  s1.ptr2 = (int *) 0xdeedbeef;
+  s2.ptr1 = (int *) 0L;
+  s2.ptr2 = (int *) 0xdeedbeef;
+  a = (int *) 0L;
+  b = (int *) 0xdeedbeef;
+  c = (int *) 0L;
+  d = (int *) 0xdeedbeef;
+
+  int n1, n2, n3, n4;
+  n1 = n2 = n3 = n4 = 0;
+
+  #pragma omp target enter data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+
+  #pragma omp target map(s2.ptr1[:n1], s2.ptr2[:n2], c[:n3], d[:n4])
+  {
+    if (s2.ptr1 != (void *) 0L || s2.ptr2 != (void *) 0xdeedbeef
+	|| c != (void *) 0L || d != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target
+  {
+    if (s1.ptr1 != (void *) 0L || s1.ptr2 != (void *) 0xdeedbeef
+	|| a != (void *) 0L || b != (void *) 0xdeedbeef)
+      __builtin_abort ();
+  }
+
+  #pragma omp target exit data map(s1.ptr1[:n1], s1.ptr2[:n2], a[:n3], b[:n4])
+
+  with_offset ();
+}
diff --git a/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90 b/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90
new file mode 100644
index 0000000..d5e982b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/alloc-comp-4.f90
@@ -0,0 +1,75 @@
+!
+! Check that mapping with map(var%tiles(1)) works.
+!
+! This uses deep mapping to handle the allocatable
+! derived-type components
+!
+! The tricky part is that GCC generates intermittently
+! an SSA_NAME that needs to be resolved.
+!
+module m
+type t
+ integer, allocatable :: den1(:,:), den2(:,:)
+end type t
+
+type t2
+ type(t), allocatable :: tiles(:)
+end type t2
+end
+
+use m
+use iso_c_binding
+implicit none (type, external)
+type(t2), target :: var
+logical :: is_self_map
+type(C_ptr) :: pden1, pden2, ptiles, ptiles1
+
+allocate(var%tiles(1))
+var%tiles(1)%den1 = reshape([1,2,3,4],[2,2])
+var%tiles(1)%den2 = reshape([11,22,33,44],[2,2])
+
+ptiles = c_loc(var%tiles)
+ptiles1 = c_loc(var%tiles(1))
+pden1 = c_loc(var%tiles(1)%den1)
+pden2 = c_loc(var%tiles(1)%den2)
+
+
+is_self_map = .false.
+!$omp target map(to: is_self_map)
+  is_self_map = .true.
+!$omp end target
+
+!$omp target enter data map(var%tiles(1))
+
+!$omp target firstprivate(ptiles, ptiles1, pden1, pden2)
+ if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 1
+ if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 2
+ var%tiles(1)%den1 = var%tiles(1)%den1 + 5
+ var%tiles(1)%den2 = var%tiles(1)%den2 + 7
+
+ if (is_self_map) then
+   if (.not. c_associated (ptiles, c_loc(var%tiles))) stop 3
+   if (.not. c_associated (ptiles1, c_loc(var%tiles(1)))) stop 4
+   if (.not. c_associated (pden1, c_loc(var%tiles(1)%den1))) stop 5
+   if (.not. c_associated (pden2, c_loc(var%tiles(1)%den2))) stop 6
+ else
+   if (c_associated (ptiles, c_loc(var%tiles))) stop 3
+   if (c_associated (ptiles1, c_loc(var%tiles(1)))) stop 4
+   if (c_associated (pden1, c_loc(var%tiles(1)%den1))) stop 5
+   if (c_associated (pden2, c_loc(var%tiles(1)%den2))) stop 6
+ endif
+!$omp end target
+
+if (is_self_map) then
+  if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 7
+  if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 8
+else
+  if (any (var%tiles(1)%den1 /= reshape([1,2,3,4],[2,2]))) stop 7
+  if (any (var%tiles(1)%den2 /= reshape([11,22,33,44],[2,2]))) stop 8
+endif
+
+!$omp target exit data map(var%tiles(1))
+
+if (any (var%tiles(1)%den1 /= 5 + reshape([1,2,3,4],[2,2]))) stop 7
+if (any (var%tiles(1)%den2 /= 7 + reshape([11,22,33,44],[2,2]))) stop 8
+end
diff --git a/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90 b/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90
new file mode 100644
index 0000000..383ecba
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90
@@ -0,0 +1,53 @@
+implicit none
+type t
+  integer, allocatable :: a, b(:)
+end type t
+type(t) :: x, y, z
+integer :: i
+
+!$omp target map(to: x)
+  if (allocated(x%a)) stop 1
+  if (allocated(x%b)) stop 2
+!$omp end target
+
+allocate(x%a, x%b(-4:6))
+x%b(:) = [(i, i=-4,6)]
+
+!$omp target map(to: x)
+  if (.not. allocated(x%a)) stop 3
+  if (.not. allocated(x%b)) stop 4
+  if (lbound(x%b,1) /= -4) stop 5
+  if (ubound(x%b,1) /= 6) stop 6
+  if (any (x%b /= [(i, i=-4,6)])) stop 7
+!$omp end target
+
+
+! The following only works with arrays due to
+! PR fortran/96668
+
+!$omp target enter data map(to: y, z)
+
+!$omp target map(to: y, z)
+  if (allocated(y%b)) stop 8
+  if (allocated(z%b)) stop 9
+!$omp end target
+
+allocate(y%b(5), z%b(3))
+y%b = 42
+z%b = 99
+
+! (implicitly) 'tofrom' mapped
+! Planned for OpenMP 6.0 (but common extension)
+! OpenMP <= 5.0 unclear
+!$omp target map(to: y)
+  if (.not.allocated(y%b)) stop 10
+  if (any (y%b /= 42)) stop 11
+!$omp end target
+
+! always map: OpenMP 5.1 (clarified)
+!$omp target map(always, tofrom: z)
+  if (.not.allocated(z%b)) stop 12
+  if (any (z%b /= 99)) stop 13
+!$omp end target
+
+end
diff --git a/libgomp/testsuite/libgomp.fortran/allocate-8a.f90 b/libgomp/testsuite/libgomp.fortran/allocate-8a.f90
new file mode 100644
index 0000000..5f6c8c1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/allocate-8a.f90
@@ -0,0 +1,45 @@
+! { dg-additional-options "-fopenmp-allocators" }
+! { dg-additional-options "-fdump-tree-omplower" }
+program main
+  use iso_c_binding
+  use omp_lib
+  implicit none (type, external)
+  integer(omp_allocator_handle_kind):: alloc_h
+  integer :: i, N
+  integer(c_intptr_t) :: intptr
+  integer, allocatable :: A(:)
+  type(omp_alloctrait):: traits(1) = [omp_alloctrait(omp_atk_alignment, 128)]
+
+  N = 10
+  alloc_h = omp_init_allocator(omp_default_mem_space, 1, traits)
+
+  !$omp allocate(A) allocator(alloc_h)
+  allocate(A(N))
+  a(:) = [(i, i=1,N)]
+  if (mod (transfer (loc(a), intptr),128) /= 0) &
+    stop 1
+  if (any (a /= [(i, i=1,N)])) &
+    stop 2
+  deallocate(A)
+  !$omp allocate(A) allocator(alloc_h) align(512)
+  allocate(A(N))
+  block
+    integer, allocatable :: B(:)
+    !$omp allocators allocate(allocator(alloc_h), align(256) : B)
+    allocate(B(N))
+    B(:) = [(2*i, i=1,N)]
+    A(:) = B
+    if (mod (transfer (loc(B), intptr), 256) /= 0) &
+      stop 1
+    ! end of scope deallocation
+  end block
+  if (mod (transfer (loc(a), intptr),512) /= 0) &
+    stop 1
+  if (any (a /= [(2*i, i=1,N)])) &
+    stop 2
+  deallocate(A) ! Must deallocate here - before deallocator is destroyed
+  call omp_destroy_allocator(alloc_h)
+  ! No auto dealloc of A because it is SAVE
+end
+! { dg-final { scan-tree-dump-times "__builtin_GOMP_alloc \\(" 3 "omplower" } }
+! { dg-final { scan-tree-dump-times "__builtin_GOMP_free \\(" 3 "omplower" } }
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90
new file mode 100644
index 0000000..eb2f437
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90
@@ -0,0 +1,10 @@
+! { dg-do run { target { offload_device_gcn } } }
+! { dg-do link { target { ! offload_device_gcn } } }
+
+! { dg-require-effective-target gomp_hipfort_module }
+! { dg-require-effective-target gomp_libamdhip64 }
+! { dg-additional-options "-lamdhip64" }
+
+#define HAVE_HIPFORT 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90
new file mode 100644
index 0000000..0ebbe80
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90
@@ -0,0 +1,9 @@
+! { dg-do run { target { offload_device_gcn } } }
+! { dg-do link { target { ! offload_device_gcn } } }
+
+! { dg-require-effective-target gomp_libamdhip64 }
+! { dg-additional-options "-lamdhip64" }
+
+#define USE_HIP_FALLBACK_MODULE 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90
new file mode 100644
index 0000000..d29a689
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90
@@ -0,0 +1,12 @@
+! { dg-do run { target { offload_device_nvptx } } }
+! { dg-do link { target { ! offload_device_nvptx } } }
+
+! { dg-require-effective-target gomp_hipfort_module }
+! { dg-require-effective-target openacc_cudart }
+! { dg-require-effective-target openacc_cuda }
+! { dg-additional-options "-lcuda -lcudart" }
+
+#define HAVE_HIPFORT 1
+#define USE_CUDA_NAMES 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90
new file mode 100644
index 0000000..2063610
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90
@@ -0,0 +1,11 @@
+! { dg-do run { target { offload_device_nvptx } } }
+! { dg-do link { target { ! offload_device_nvptx } } }
+
+! { dg-require-effective-target openacc_libcudart }
+! { dg-require-effective-target openacc_libcuda }
+! { dg-additional-options "-lcuda -lcudart" }
+
+#define USE_CUDA_NAMES 1
+#define USE_HIP_FALLBACK_MODULE 1
+
+#include "interop-hip.h"
diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip.h b/libgomp/testsuite/libgomp.fortran/interop-hip.h
new file mode 100644
index 0000000..753ccce
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/interop-hip.h
@@ -0,0 +1,214 @@
+! Minimal check whether HIP works - by checking whether the API routines
+! seem to work.  This includes a  fallback if hipfort is not available
+
+#ifndef HAVE_HIPFORT
+#ifndef USE_HIP_FALLBACK_MODULE
+#if USE_CUDA_NAMES
+#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined (for NVIDA/CUDA)"
+#else
+#warning "Using fallback implementation for module hipfort as HAVE_HIPFORT is undefined - assume AMD as USE_CUDA_NAMES is unset"
+#endif
+#endif
+module hipfort  ! Minimal implementation for the testsuite
+  implicit none
+
+  enum, bind(c)
+    enumerator :: hipSuccess = 0
+    enumerator :: hipErrorNotSupported = 801
+  end enum
+
+  enum, bind(c)
+    enumerator :: hipDeviceAttributeClockRate = 5
+    enumerator :: hipDeviceAttributeMaxGridDimX = 29
+  end enum
+
+  interface
+    integer(kind(hipSuccess)) function hipDeviceGetAttribute (ip, attr, dev) &
+#if USE_CUDA_NAMES
+        bind(c, name="cudaDeviceGetAttribute")
+#else
+        bind(c, name="hipDeviceGetAttribute")
+#endif
+      use iso_c_binding, only: c_ptr, c_int
+      import
+      implicit none
+      type(c_ptr), value :: ip
+      integer(kind(hipDeviceAttributeClockRate)), value :: attr
+      integer(c_int), value :: dev
+    end
+
+    integer(kind(hipSuccess)) function hipCtxGetApiVersion (ctx, ip) &
+#if USE_CUDA_NAMES
+        bind(c, name="cudaCtxGetApiVersion")
+#else
+        bind(c, name="hipCtxGetApiVersion")
+#endif
+      use iso_c_binding, only: c_ptr
+      import
+      implicit none
+      type(c_ptr), value :: ctx, ip
+    end
+
+    integer(kind(hipSuccess)) function hipStreamQuery (stream) &
+#if USE_CUDA_NAMES
+        bind(c, name="cudaStreamQuery")
+#else
+        bind(c, name="hipStreamQuery")
+#endif
+      use iso_c_binding, only: c_ptr
+      import
+      implicit none
+      type(c_ptr), value :: stream
+    end
+
+   integer(kind(hipSuccess)) function hipStreamGetFlags (stream, flags) &
+#if USE_CUDA_NAMES
+        bind(c, name="cudaStreamGetFlags")
+#else
+        bind(c, name="hipStreamGetFlags")
+#endif
+      use iso_c_binding, only: c_ptr
+      import
+      implicit none
+      type(c_ptr), value :: stream
+      type(c_ptr), value :: flags
+    end
+  end interface
+end module
+#endif
+
+program main
+  use iso_c_binding, only: c_ptr, c_int, c_loc
+  use omp_lib
+  use hipfort
+  implicit none (type, external)
+
+! Only supported since CUDA 12.8 - skip for better compatibility
+!  ! Manally implement hipStreamGetDevice as hipfort misses it
+!  ! -> https://github.com/ROCm/hipfort/issues/238
+!  interface
+!    integer(kind(hipSuccess)) function my_hipStreamGetDevice(stream, dev) &
+!#if USE_CUDA_NAMES
+!        bind(c, name="cudaStreamGetDevice")
+!#else
+!        bind(c, name="hipStreamGetDevice")
+!#endif
+!      use iso_c_binding, only: c_ptr, c_int
+!      import
+!      implicit none
+!      type(c_ptr), value :: stream
+!      integer(c_int) :: dev
+!    end
+!  end interface
+
+  integer(c_int), target :: ivar
+  integer(omp_interop_rc_kind) :: res
+  integer(omp_interop_kind) :: obj
+  integer(omp_interop_fr_kind) :: fr
+  integer(kind(hipSuccess)) :: hip_err
+  integer(c_int) :: hip_dev, dev_stream
+  type(c_ptr) :: hip_ctx, hip_sm
+
+  logical :: vendor_is_amd
+
+  obj = omp_interop_none
+
+  !$omp interop init(target, targetsync, prefer_type("hip") : obj)
+
+  fr = omp_get_interop_int (obj, omp_ipr_fr_id, res)
+  if (res /= omp_irc_success) error stop 1
+  if (fr /= omp_ifr_hip) error stop 1
+
+  ivar = omp_get_interop_int (obj, omp_ipr_vendor, res)
+  if (ivar == 1) then  ! AMD
+    vendor_is_amd = .true.
+  else if (ivar == 11) then  ! Nvidia
+    vendor_is_amd = .false.
+  else
+    error stop 1  ! Unknown
+  endif
+#if USE_CUDA_NAMES
+  if (vendor_is_amd) error stop 1
+#else
+  if (.not. vendor_is_amd) error stop 1
+#endif
+
+  ! Check whether the omp_ipr_device -> hipDevice_t yields a valid device.
+
+  hip_dev = omp_get_interop_int (obj, omp_ipr_device, res)
+  if (res /= omp_irc_success) error stop 1
+
+! AMD messed up in Fortran with the attribute handling, missing the
+! translation table it has for C.
+block
+  enum, bind(c)
+    enumerator :: cudaDevAttrClockRate = 13
+    enumerator :: cudaDevAttrMaxGridDimX = 5
+  end enum
+
+  ! Assume a clock size is available and > 1 GHz; value is in kHz.
+  ! c_loc is completely bogus, but as AMD messed up the interface ...
+  ! Cf. https://github.com/ROCm/hipfort/issues/239
+if (vendor_is_amd) then
+  hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeClockRate, hip_dev)
+else
+  hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrClockRate, hip_dev)
+endif
+  if (hip_err /= hipSuccess) error stop 1
+  if (ivar <= 1000000) error stop 1  ! in kHz
+
+  ! Assume that the MaxGridDimX is available and > 1024
+  ! c_loc is completely bogus, but as AMD messed up the interface ...
+  ! Cf. https://github.com/ROCm/hipfort/issues/239
+if (vendor_is_amd) then
+  hip_err = hipDeviceGetAttribute (c_loc(ivar), hipDeviceAttributeMaxGridDimX, hip_dev)
+else
+  hip_err = hipDeviceGetAttribute (c_loc(ivar), cudaDevAttrMaxGridDimX, hip_dev)
+endif
+  if (hip_err /= hipSuccess) error stop 1
+  if (ivar <= 1024) error stop 1
+end block
+
+
+  ! Check whether the omp_ipr_device_context -> hipCtx_t yields a context.
+
+  hip_ctx = omp_get_interop_ptr (obj, omp_ipr_device_context, res)
+  if (res /= omp_irc_success) error stop 1
+
+!  ! Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD.  */
+!  ivar = -99
+!  ! AMD deprectated hipCtxGetApiVersion (in C/C++)
+!  hip_err = hipCtxGetApiVersion (hip_ctx, c_loc(ivar))
+!
+!  if (vendor_is_amd) then
+!    if (hip_err /= hipErrorNotSupported .or. ivar /= -99) error stop 1
+!  else
+!    if (hip_err /= hipSuccess) error stop 1
+!    if (ivar <= 0) error stop 1
+!  end if
+
+
+  ! Check whether the omp_ipr_targetsync -> hipStream_t yields a stream.
+
+  hip_sm = omp_get_interop_ptr (obj, omp_ipr_targetsync, res)
+  if (res /= omp_irc_success) error stop 1
+
+! Skip as this is only in CUDA 12.8
+!  dev_stream = 99
+!    ! Not (yet) implemented: https://github.com/ROCm/hipfort/issues/238
+!    !  hip_err = hipStreamGetDevice (hip_sm, dev_stream)
+!  hip_err = my_hipStreamGetDevice (hip_sm, dev_stream)
+!  if (hip_err /= hipSuccess) error stop 1
+!  if (dev_stream /= hip_dev) error stop 1
+
+  ! Get flags of the stream
+  hip_err = hipStreamGetFlags (hip_sm, c_loc (ivar))
+  if (hip_err /= hipSuccess) error stop 1
+  ! Accept any value
+
+  ! All jobs should have been completed (as there were none none)
+  hip_err = hipStreamQuery (hip_sm)
+  if (hip_err /= hipSuccess) error stop 1
+
+  !$omp interop destroy(obj)
+end
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90
new file mode 100644
index 0000000..9d48c7c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90
@@ -0,0 +1,121 @@
+type t2
+  integer x, y, z
+end type t2
+type t
+  integer, allocatable :: A
+  integer, allocatable :: B(:)
+  type(t2), allocatable :: C
+  type(t2), allocatable :: D(:,:)
+end type t
+
+type t3
+  type(t) :: Q
+  type(t) :: R(5)
+end type
+
+type(t) :: var, var2
+type(t3) :: var3, var4
+
+! --------------------------------------
+! Assign + allocate
+var%A = 45
+var%B = [1,2,3]
+var%C = t2(6,5,4)
+var%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2])
+
+! Assign + allocate
+var2%A = 145
+var2%B = [991,992,993]
+var2%C = t2(996,995,994)
+var2%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2])
+
+
+!$omp target map(to: var) map(tofrom: var2)
+  call foo(var, var2)
+!$omp end target
+
+if (var2%A /= 45) stop 9
+if (any (var2%B /= [1,2,3])) stop 10
+if (var2%C%x /= 6) stop 11
+if (var2%C%y /= 5) stop 11
+if (var2%C%z /= 4) stop 11
+if (any (var2%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 12
+if (any (var2%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 12
+if (any (var2%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 12
+
+! --------------------------------------
+! Assign + allocate
+var3%Q%A = 45
+var3%Q%B = [1,2,3]
+var3%Q%C = t2(6,5,4)
+var3%Q%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2])
+
+var3%R(2)%A = 45
+var3%R(2)%B = [1,2,3]
+var3%R(2)%C = t2(6,5,4)
+var3%R(2)%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2])
+
+! Assign + allocate
+var4%Q%A = 145
+var4%Q%B = [991,992,993]
+var4%Q%C = t2(996,995,994)
+var4%Q%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2])
+
+var4%R(3)%A = 145
+var4%R(3)%B = [991,992,993]
+var4%R(3)%C = t2(996,995,994)
+var4%R(3)%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2])
+
+!$omp target map(to: var3%Q) map(tofrom: var4%Q)
+  call foo(var3%Q, var4%Q)
+!$omp end target
+
+!$omp target map(to: var3%R(2)) map(tofrom: var4%R(3))
+  call foo(var3%R(2), var4%R(3))
+!$omp end target
+
+if (var4%Q%A /= 45) stop 13
+if (any (var4%Q%B /= [1,2,3])) stop 14
+if (var4%Q%C%x /= 6) stop 15
+if (var4%Q%C%y /= 5) stop 15
+if (var4%Q%C%z /= 4) stop 15
+if (any (var4%Q%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 16
+if (any (var4%Q%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 16
+if (any (var4%Q%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 16
+
+if (var4%R(3)%A /= 45) stop 17
+if (any (var4%R(3)%B /= [1,2,3])) stop 18
+if (var4%R(3)%C%x /= 6) stop 19
+if (var4%R(3)%C%y /= 5) stop 19
+if (var4%R(3)%C%z /= 4) stop 19
+if (any (var4%R(3)%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 20
+if (any (var4%R(3)%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 20
+if (any (var4%R(3)%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 20
+
+contains
+  subroutine foo(x, y)
+    type(t) :: x, y
+    if (x%A /= 45) stop 1
+    if (any (x%B /= [1,2,3])) stop 2
+    if (x%C%x /= 6) stop 3
+    if (x%C%y /= 5) stop 3
+    if (x%C%z /= 4) stop 3
+    if (any (x%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 4
+    if (any (x%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 4
+    if (any (x%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 4
+
+    if (y%A /= 145) stop 5
+    if (any (y%B /= [991,992,993])) stop 6
+    if (y%C%x /= 996) stop 7
+    if (y%C%y /= 995) stop 7
+    if (y%C%z /= 994) stop 7
+    if (any (y%D(:,:)%x /= reshape([199, 499, 1199, 1499], [2,2]))) stop 8
+    if (any (y%D(:,:)%y /= reshape([299, 599, 1299, 1599], [2,2]))) stop 8
+    if (any (y%D(:,:)%z /= reshape([399, 699, 1399, 1699], [2,2]))) stop 8
+
+    y%A = x%A
+    y%B(:) = x%B
+    y%C = x%C
+    y%D(:,:) = x%D(:,:)
+  end
+end
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90
new file mode 100644
index 0000000..fb9859d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90
@@ -0,0 +1,124 @@
+type t2
+  integer x, y, z
+end type t2
+type t
+  integer, allocatable :: A
+  integer, allocatable :: B(:)
+  type(t2), allocatable :: C
+  type(t2), allocatable :: D(:,:)
+end type t
+
+type t3
+  type(t) :: Q
+  type(t) :: R(5)
+end type
+
+type(t) :: var, var2
+type(t3) :: var3, var4
+
+! --------------------------------------
+! Assign + allocate
+var%A = 45
+var%B = [1,2,3]
+var%C = t2(6,5,4)
+var%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2])
+
+! Assign + allocate
+var2%A = 145
+var2%B = [991,992,993]
+var2%C = t2(996,995,994)
+var2%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2])
+
+
+!$omp target map(to: var%A, var%B, var%C, var%D) &
+!$omp&       map(tofrom: var2%A, var2%B, var2%C, var2%D)
+  call foo(var, var2)
+!$omp end target
+
+if (var2%A /= 45) stop 9
+if (any (var2%B /= [1,2,3])) stop 10
+if (var2%C%x /= 6) stop 11
+if (var2%C%y /= 5) stop 11
+if (var2%C%z /= 4) stop 11
+if (any (var2%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 12
+if (any (var2%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 12
+if (any (var2%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 12
+
+! --------------------------------------
+! Assign + allocate
+var3%Q%A = 45
+var3%Q%B = [1,2,3]
+var3%Q%C = t2(6,5,4)
+var3%Q%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2])
+
+var3%R(2)%A = 45
+var3%R(2)%B = [1,2,3]
+var3%R(2)%C = t2(6,5,4)
+var3%R(2)%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2])
+
+! Assign + allocate
+var4%Q%A = 145
+var4%Q%B = [991,992,993]
+var4%Q%C = t2(996,995,994)
+var4%Q%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2])
+
+var4%R(3)%A = 145
+var4%R(3)%B = [991,992,993]
+var4%R(3)%C = t2(996,995,994)
+var4%R(3)%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2])
+
+!$omp target map(to: var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) &
+!$omp&       map(tofrom: var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D)
+  call foo(var3%Q, var4%Q)
+!$omp end target
+
+if (var4%Q%A /= 45) stop 13
+if (any (var4%Q%B /= [1,2,3])) stop 14
+if (var4%Q%C%x /= 6) stop 15
+if (var4%Q%C%y /= 5) stop 15
+if (var4%Q%C%z /= 4) stop 15
+if (any (var4%Q%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 16
+if (any (var4%Q%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 16
+if (any (var4%Q%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 16
+
+!$omp target map(to: var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) &
+!$omp&       map(tofrom: var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D)
+  call foo(var3%R(2), var4%R(3))
+!$omp end target
+
+if (var4%R(3)%A /= 45) stop 17
+if (any (var4%R(3)%B /= [1,2,3])) stop 18
+if (var4%R(3)%C%x /= 6) stop 19
+if (var4%R(3)%C%y /= 5) stop 19
+if (var4%R(3)%C%z /= 4) stop 19
+if (any (var4%R(3)%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 20
+if (any (var4%R(3)%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 20
+if (any (var4%R(3)%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 20
+
+contains
+  subroutine foo(x, y)
+    type(t) :: x, y
+    if (x%A /= 45) stop 1
+    if (any (x%B /= [1,2,3])) stop 2
+    if (x%C%x /= 6) stop 3
+    if (x%C%y /= 5) stop 3
+    if (x%C%z /= 4) stop 3
+    if (any (x%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 4
+    if (any (x%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 4
+    if (any (x%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 4
+
+    if (y%A /= 145) stop 5
+    if (any (y%B /= [991,992,993])) stop 6
+    if (y%C%x /= 996) stop 7
+    if (y%C%y /= 995) stop 7
+    if (y%C%z /= 994) stop 7
+    if (any (y%D(:,:)%x /= reshape([199, 499, 1199, 1499], [2,2]))) stop 8
+    if (any (y%D(:,:)%y /= reshape([299, 599, 1299, 1599], [2,2]))) stop 8
+    if (any (y%D(:,:)%z /= reshape([399, 699, 1399, 1699], [2,2]))) stop 8
+
+    y%A = x%A
+    y%B(:) = x%B
+    y%C = x%C
+    y%D(:,:) = x%D(:,:)
+  end
+end
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90
new file mode 100644
index 0000000..b2e36b2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90
@@ -0,0 +1,53 @@
+implicit none
+type t
+  integer, allocatable :: a, b(:)
+end type t
+type(t) :: x, y, z
+integer :: i
+
+!$omp target
+  if (allocated(x%a)) stop 1
+  if (allocated(x%b)) stop 2
+!$omp end target
+
+allocate(x%a, x%b(-4:6))
+x%b(:) = [(i, i=-4,6)]
+
+!$omp target
+  if (.not. allocated(x%a)) stop 3
+  if (.not. allocated(x%b)) stop 4
+  if (lbound(x%b,1) /= -4) stop 5
+  if (ubound(x%b,1) /= 6) stop 6
+  if (any (x%b /= [(i, i=-4,6)])) stop 7
+!$omp end target
+
+
+! The following only works with arrays due to
+! PR fortran/96668
+
+!$omp target enter data map(to: y, z)
+
+!$omp target
+  if (allocated(y%b)) stop 8
+  if (allocated(z%b)) stop 9
+!$omp end target
+
+allocate(y%b(5), z%b(3))
+y%b = 42
+z%b = 99
+
+! (implicitly) 'tofrom' mapped
+! Planned for OpenMP 6.0 (but common extension)
+! OpenMP <= 5.0 unclear
+!$omp target
+  if (.not.allocated(y%b)) stop 10
+  if (any (y%b /= 42)) stop 11
+!$omp end target
+
+! always map: OpenMP 5.1 (clarified)
+!$omp target map(always, tofrom: z)
+  if (.not.allocated(z%b)) stop 12
+  if (any (z%b /= 99)) stop 13
+!$omp end target
+
+end
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90
new file mode 100644
index 0000000..48d4aea
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90
@@ -0,0 +1,308 @@
+! NOTE: This code uses POINTER.
+! While map(p, var%p) etc. maps the ptr/ptr comp p / var%p (incl. allocatable comps),
+! map(var) does not map var%p.
+
+use iso_c_binding
+implicit none
+type t2
+  integer, allocatable :: x, y, z
+end type t2
+type t
+  integer, pointer :: A => null()
+  integer, pointer :: B(:) => null()
+  type(t2), pointer :: C => null()
+  type(t2), pointer :: D(:,:) => null()
+end type t
+
+type t3
+  type(t) :: Q
+  type(t) :: R(5)
+end type
+
+type(t) :: var, var2
+type(t3) :: var3, var4
+integer(c_intptr_t) :: iptr
+
+! --------------------------------------
+! Assign + allocate
+allocate (var%A, source=45)
+allocate (var%B(3), source=[1,2,3])
+allocate (var%C)
+var%C%x = 6; var%C%y = 5; var%C%z = 4
+allocate (var%D(2,2))
+var%D(1,1)%x = 1
+var%D(1,1)%y = 2
+var%D(1,1)%z = 3
+var%D(2,1)%x = 4
+var%D(2,1)%y = 5
+var%D(2,1)%z = 6
+var%D(1,2)%x = 11
+var%D(1,2)%y = 12
+var%D(1,2)%z = 13
+var%D(2,2)%x = 14
+var%D(2,2)%y = 15
+var%D(2,2)%z = 16
+
+! Assign + allocate
+allocate (var2%A, source=145)
+allocate (var2%B, source=[991,992,993])
+allocate (var2%C)
+var2%C%x = 996; var2%C%y = 995; var2%C%z = 994
+allocate (var2%D(2,2))
+var2%D(1,1)%x = 199
+var2%D(1,1)%y = 299
+var2%D(1,1)%z = 399
+var2%D(2,1)%x = 499
+var2%D(2,1)%y = 599
+var2%D(2,1)%z = 699
+var2%D(1,2)%x = 1199
+var2%D(1,2)%y = 1299
+var2%D(1,2)%z = 1399
+var2%D(2,2)%x = 1499
+var2%D(2,2)%y = 1599
+var2%D(2,2)%z = 1699
+
+block
+  integer(c_intptr_t) :: loc_a, loc_b, loc_c, loc_d, loc2_a, loc2_b, loc2_c, loc2_d
+  loc_a = loc (var%a)
+  loc_b = loc (var%b)
+  loc_c = loc (var%d)
+  loc_d = loc (var%d)
+  loc2_a = loc (var2%a)
+  loc2_b = loc (var2%b)
+  loc2_c = loc (var2%c)
+  loc2_d = loc (var2%d)
+  ! var/var2 are mapped, but the pointer components aren't
+  !$omp target map(to: var) map(tofrom: var2)
+    if (loc_a /= loc (var%a)) stop 31
+    if (loc_b /= loc (var%b)) stop 32
+    if (loc_c /= loc (var%d)) stop 33
+    if (loc_d /= loc (var%d)) stop 34
+    if (loc2_a /= loc (var2%a)) stop 35
+    if (loc2_b /= loc (var2%b)) stop 36
+    if (loc2_c /= loc (var2%c)) stop 37
+    if (loc2_d /= loc (var2%d)) stop 38
+  !$omp end target
+  if (loc_a /= loc (var%a)) stop 41
+  if (loc_b /= loc (var%b)) stop 42
+  if (loc_c /= loc (var%d)) stop 43
+  if (loc_d /= loc (var%d)) stop 44
+  if (loc2_a /= loc (var2%a)) stop 45
+  if (loc2_b /= loc (var2%b)) stop 46
+  if (loc2_c /= loc (var2%c)) stop 47
+  if (loc2_d /= loc (var2%d)) stop 48
+end block
+
+block
+  ! Map only (all) components, but this maps also the alloc comps
+  !$omp target map(to: var%a, var%b, var%c, var%d) map(tofrom: var2%a, var2%b, var2%c, var2%d)
+    call foo (var,var2)
+  !$omp end target
+end block
+
+if (var2%A /= 45) stop 9
+if (any (var2%B /= [1,2,3])) stop 10
+if (var2%C%x /= 6) stop 11
+if (var2%C%y /= 5) stop 11
+if (var2%C%z /= 4) stop 11
+block
+  integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j
+  tmp_x = reshape([1, 4, 11, 14], [2,2])
+  tmp_y = reshape([2, 5, 12, 15], [2,2])
+  tmp_z = reshape([3, 6, 13, 16], [2,2])
+  do j = 1, 2
+    do i = 1, 2
+      if (var2%D(i,j)%x /= tmp_x(i,j)) stop 12
+      if (var2%D(i,j)%y /= tmp_y(i,j)) stop 12
+      if (var2%D(i,j)%z /= tmp_z(i,j)) stop 12
+    end do
+  end do
+end block
+
+! Extra deallocates due to PR fortran/104697
+deallocate(var%C%x, var%C%y, var%C%z)
+deallocate(var%D(1,1)%x, var%D(1,1)%y, var%D(1,1)%z)
+deallocate(var%D(2,1)%x, var%D(2,1)%y, var%D(2,1)%z)
+deallocate(var%D(1,2)%x, var%D(1,2)%y, var%D(1,2)%z)
+deallocate(var%D(2,2)%x, var%D(2,2)%y, var%D(2,2)%z)
+deallocate(var%A, var%B, var%C, var%D)
+
+deallocate(var2%C%x, var2%C%y, var2%C%z)
+deallocate(var2%D(1,1)%x, var2%D(1,1)%y, var2%D(1,1)%z)
+deallocate(var2%D(2,1)%x, var2%D(2,1)%y, var2%D(2,1)%z)
+deallocate(var2%D(1,2)%x, var2%D(1,2)%y, var2%D(1,2)%z)
+deallocate(var2%D(2,2)%x, var2%D(2,2)%y, var2%D(2,2)%z)
+deallocate(var2%A, var2%B, var2%C, var2%D)
+
+! --------------------------------------
+! Assign + allocate
+allocate (var3%Q%A, source=45)
+allocate (var3%Q%B, source=[1,2,3])
+allocate (var3%Q%C, source=t2(6,5,4))
+allocate (var3%Q%D(2,2))
+var3%Q%D(1,1) = t2(1,2,3)
+var3%Q%D(2,1) = t2(4,5,6)
+var3%Q%D(1,2) = t2(11,12,13)
+var3%Q%D(2,2) = t2(14,15,16)
+
+allocate (var3%R(2)%A, source=45)
+allocate (var3%R(2)%B, source=[1,2,3])
+allocate (var3%R(2)%C, source=t2(6,5,4))
+allocate (var3%R(2)%D(2,2))
+var3%R(2)%D(1,1) = t2(1,2,3)
+var3%R(2)%D(2,1) = t2(4,5,6)
+var3%R(2)%D(1,2) = t2(11,12,13)
+var3%R(2)%D(2,2) = t2(14,15,16)
+
+! Assign + allocate
+allocate (var4%Q%A, source=145)
+allocate (var4%Q%B, source=[991,992,993])
+allocate (var4%Q%C, source=t2(996,995,994))
+allocate (var4%Q%D(2,2))
+var4%Q%D(1,1) = t2(199,299,399)
+var4%Q%D(2,1) = t2(499,599,699)
+var4%Q%D(1,2) = t2(1199,1299,1399)
+var4%Q%D(2,2) = t2(1499,1599,1699)
+
+allocate (var4%R(3)%A, source=145)
+allocate (var4%R(3)%B, source=[991,992,993])
+allocate (var4%R(3)%C, source=t2(996,995,994))
+allocate (var4%R(3)%D(2,2))
+var4%R(3)%D(1,1) = t2(199,299,399)
+var4%R(3)%D(2,1) = t2(499,599,699)
+var4%R(3)%D(1,2) = t2(1199,1299,1399)
+var4%R(3)%D(2,2) = t2(1499,1599,1699)
+
+!$omp target map(to: var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) &
+!$omp&       map(tofrom: var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D)
+  call foo(var3%Q, var4%Q)
+!$omp end target
+
+iptr = loc(var3%R(2)%A)
+
+!$omp target map(to: var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) &
+!$omp&       map(tofrom: var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D)
+  call foo(var3%R(2), var4%R(3))
+!$omp end target
+
+if (var4%Q%A /= 45) stop 13
+if (any (var4%Q%B /= [1,2,3])) stop 14
+if (var4%Q%C%x /= 6) stop 15
+if (var4%Q%C%y /= 5) stop 15
+if (var4%Q%C%z /= 4) stop 15
+block
+  integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j
+  tmp_x = reshape([1, 4, 11, 14], [2,2])
+  tmp_y = reshape([2, 5, 12, 15], [2,2])
+  tmp_z = reshape([3, 6, 13, 16], [2,2])
+  do j = 1, 2
+    do i = 1, 2
+      if (var4%Q%D(i,j)%x /= tmp_x(i,j)) stop 16
+      if (var4%Q%D(i,j)%y /= tmp_y(i,j)) stop 16
+      if (var4%Q%D(i,j)%z /= tmp_z(i,j)) stop 16
+    end do
+  end do
+end block
+
+! Cf. PR fortran/104696
+! { dg-output "valid mapping, OK" { xfail { offload_device_nonshared_as } } }
+if (iptr /= loc(var3%R(2)%A)) then
+  print *, "invalid mapping, cf. PR fortran/104696"
+else
+
+if (var4%R(3)%A /= 45) stop 17
+if (any (var4%R(3)%B /= [1,2,3])) stop 18
+if (var4%R(3)%C%x /= 6) stop 19
+if (var4%R(3)%C%y /= 5) stop 19
+if (var4%R(3)%C%z /= 4) stop 19
+block
+  integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j
+  tmp_x = reshape([1, 4, 11, 14], [2,2])
+  tmp_y = reshape([2, 5, 12, 15], [2,2])
+  tmp_z = reshape([3, 6, 13, 16], [2,2])
+  do j = 1, 2
+    do i = 1, 2
+      if (var4%R(3)%D(i,j)%x /= tmp_x(i,j)) stop 20
+      if (var4%R(3)%D(i,j)%y /= tmp_y(i,j)) stop 20
+      if (var4%R(3)%D(i,j)%z /= tmp_z(i,j)) stop 20
+    end do
+  end do
+end block
+
+! Extra deallocates due to PR fortran/104697
+deallocate(var3%Q%C%x, var3%Q%D(1,1)%x, var3%Q%D(2,1)%x, var3%Q%D(1,2)%x, var3%Q%D(2,2)%x)
+deallocate(var3%Q%C%y, var3%Q%D(1,1)%y, var3%Q%D(2,1)%y, var3%Q%D(1,2)%y, var3%Q%D(2,2)%y)
+deallocate(var3%Q%C%z, var3%Q%D(1,1)%z, var3%Q%D(2,1)%z, var3%Q%D(1,2)%z, var3%Q%D(2,2)%z)
+deallocate(var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D)
+
+deallocate(var4%Q%C%x, var4%Q%D(1,1)%x, var4%Q%D(2,1)%x, var4%Q%D(1,2)%x, var4%Q%D(2,2)%x)
+deallocate(var4%Q%C%y, var4%Q%D(1,1)%y, var4%Q%D(2,1)%y, var4%Q%D(1,2)%y, var4%Q%D(2,2)%y)
+deallocate(var4%Q%C%z, var4%Q%D(1,1)%z, var4%Q%D(2,1)%z, var4%Q%D(1,2)%z, var4%Q%D(2,2)%z)
+deallocate(var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D)
+
+deallocate(var3%R(2)%C%x, var3%R(2)%D(1,1)%x, var3%R(2)%D(2,1)%x, var3%R(2)%D(1,2)%x, var3%R(2)%D(2,2)%x)
+deallocate(var3%R(2)%C%y, var3%R(2)%D(1,1)%y, var3%R(2)%D(2,1)%y, var3%R(2)%D(1,2)%y, var3%R(2)%D(2,2)%y)
+deallocate(var3%R(2)%C%z, var3%R(2)%D(1,1)%z, var3%R(2)%D(2,1)%z, var3%R(2)%D(1,2)%z, var3%R(2)%D(2,2)%z)
+deallocate(var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D)
+
+deallocate(var4%R(3)%C%x, var4%R(3)%D(1,1)%x, var4%R(3)%D(2,1)%x, var4%R(3)%D(1,2)%x, var4%R(3)%D(2,2)%x)
+deallocate(var4%R(3)%C%y, var4%R(3)%D(1,1)%y, var4%R(3)%D(2,1)%y, var4%R(3)%D(1,2)%y, var4%R(3)%D(2,2)%y)
+deallocate(var4%R(3)%C%z, var4%R(3)%D(1,1)%z, var4%R(3)%D(2,1)%z, var4%R(3)%D(1,2)%z, var4%R(3)%D(2,2)%z)
+deallocate(var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D)
+
+  print *, "valid mapping, OK"
+endif
+
+contains
+  subroutine foo(x, y)
+    type(t) :: x, y
+    intent(in) :: x
+    intent(inout) :: y
+    integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j
+    if (x%A /= 45) stop 1
+    if (any (x%B /= [1,2,3])) stop 2
+    if (x%C%x /= 6) stop 3
+    if (x%C%y /= 5) stop 3
+    if (x%C%z /= 4) stop 3
+
+    tmp_x = reshape([1, 4, 11, 14], [2,2])
+    tmp_y = reshape([2, 5, 12, 15], [2,2])
+    tmp_z = reshape([3, 6, 13, 16], [2,2])
+    do j = 1, 2
+      do i = 1, 2
+        if (x%D(i,j)%x /= tmp_x(i,j)) stop 4
+        if (x%D(i,j)%y /= tmp_y(i,j)) stop 4
+        if (x%D(i,j)%z /= tmp_z(i,j)) stop 4
+      end do
+    end do
+
+    if (y%A /= 145) stop 5
+    if (any (y%B /= [991,992,993])) stop 6
+    if (y%C%x /= 996) stop 7
+    if (y%C%y /= 995) stop 7
+    if (y%C%z /= 994) stop 7
+    tmp_x = reshape([199, 499, 1199, 1499], [2,2])
+    tmp_y = reshape([299, 599, 1299, 1599], [2,2])
+    tmp_z = reshape([399, 699, 1399, 1699], [2,2])
+    do j = 1, 2
+      do i = 1, 2
+        if (y%D(i,j)%x /= tmp_x(i,j)) stop 8
+        if (y%D(i,j)%y /= tmp_y(i,j)) stop 8
+        if (y%D(i,j)%z /= tmp_z(i,j)) stop 8
+      end do
+    end do
+
+    y%A = x%A
+    y%B(:) = x%B
+    y%C%x = x%C%x
+    y%C%y = x%C%y
+    y%C%z = x%C%z
+    do j = 1, 2
+      do i = 1, 2
+        y%D(i,j)%x = x%D(i,j)%x
+        y%D(i,j)%y = x%D(i,j)%y
+        y%D(i,j)%z = x%D(i,j)%z
+      end do
+    end do
+  end
+end
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90
new file mode 100644
index 0000000..1493c5f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90
@@ -0,0 +1,672 @@
+module m
+  implicit none (type, external)
+  type t
+    integer, allocatable :: arr(:,:)
+    integer :: var
+    integer, allocatable :: slr
+  end type t
+
+contains
+
+  subroutine check_it (is_present, dummy_alloced, inner_alloc, &
+                       scalar, array, a_scalar, a_array, &
+                       l_scalar, l_array, la_scalar, la_array, &
+                       opt_scalar, opt_array, a_opt_scalar, a_opt_array)
+    type(t), intent(inout) :: &
+            scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:), &
+            a_opt_scalar, a_opt_array(:,:), &
+            l_scalar, l_array(:,:), la_scalar, la_array(:,:)
+    optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array
+    allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array
+    logical, value :: is_present, dummy_alloced, inner_alloc
+    integer :: i, j, k, l
+
+    ! CHECK VALUE
+    if (scalar%var /= 42) stop 1
+    if (l_scalar%var /= 42) stop 1
+    if (is_present) then
+      if (opt_scalar%var /= 42) stop 2
+    end if
+    if (any (shape(array) /= [3,2])) stop 1
+    if (any (shape(l_array) /= [3,2])) stop 1
+    if (is_present) then
+      if (any (shape(opt_array) /= [3,2])) stop 1
+    end if
+    do j = 1, 2
+      do i = 1, 3
+        if (array(i,j)%var /= i*97 + 100*41*j) stop 3
+        if (l_array(i,j)%var /= i*97 + 100*41*j) stop 3
+        if (is_present) then
+          if (opt_array(i,j)%var /= i*97 + 100*41*j) stop 4
+        end if
+      end do
+    end do
+
+    if (dummy_alloced) then
+      if (a_scalar%var /= 42) stop 1
+      if (la_scalar%var /= 42) stop 1
+      if (is_present) then
+        if (a_opt_scalar%var /= 42) stop 1
+      end if
+      if (any (shape(a_array) /= [3,2])) stop 1
+      if (any (shape(la_array) /= [3,2])) stop 1
+      if (is_present) then
+        if (any (shape(a_opt_array) /= [3,2])) stop 1
+      end if
+      do j = 1, 2
+        do i = 1, 3
+          if (a_array(i,j)%var /= i*97 + 100*41*j) stop 1
+          if (la_array(i,j)%var /= i*97 + 100*41*j) stop 1
+          if (is_present) then
+            if (a_opt_array(i,j)%var /= i*97 + 100*41*j) stop 1
+          end if
+        end do
+      end do
+    else
+      if (allocated (a_scalar)) stop 1
+      if (allocated (la_scalar)) stop 1
+      if (allocated (a_array)) stop 1
+      if (allocated (la_array)) stop 1
+      if (is_present) then
+        if (allocated (a_opt_scalar)) stop 1
+        if (allocated (a_opt_array)) stop 1
+      end if
+    end if
+
+    if (inner_alloc) then
+      if (scalar%slr /= 467) stop 5
+      if (l_scalar%slr /= 467) stop 5
+      if (a_scalar%slr /= 467) stop 6
+      if (la_scalar%slr /= 467) stop 6
+      if (is_present) then
+        if (opt_scalar%slr /= 467) stop 7
+        if (a_opt_scalar%slr /= 467) stop 8
+      end if
+      do j = 1, 2
+        do i = 1, 3
+          if (array(i,j)%slr /= (i*97 + 100*41*j)  + 467) stop 9
+          if (l_array(i,j)%slr /= (i*97 + 100*41*j)  + 467) stop 9
+          if (a_array(i,j)%slr /= (i*97 + 100*41*j)  + 467) stop 10
+          if (la_array(i,j)%slr /= (i*97 + 100*41*j)  + 467) stop 10
+          if (is_present) then
+            if (opt_array(i,j)%slr /= (i*97 + 100*41*j)  + 467) stop 11
+            if (a_opt_array(i,j)%slr /= (i*97 + 100*41*j)  + 467) stop 12
+          end if
+        end do
+      end do
+
+      do l = 1, 5
+        do k = 1, 4
+          if (any (shape(scalar%arr) /= [4,5])) stop 1
+          if (any (shape(l_scalar%arr) /= [4,5])) stop 1
+          if (any (shape(a_scalar%arr) /= [4,5])) stop 1
+          if (any (shape(la_scalar%arr) /= [4,5])) stop 1
+          if (scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 13
+          if (l_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 13
+          if (a_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 14
+          if (la_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 14
+          if (is_present) then
+            if (any (shape(opt_scalar%arr) /= [4,5])) stop 1
+            if (any (shape(a_opt_scalar%arr) /= [4,5])) stop 1
+            if (opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 15
+            if (a_opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 16
+          end if
+        end do
+      end do
+      do j = 1, 2
+        do i = 1, 3
+          if (any (shape(array(i,j)%arr) /= [i,j])) stop 1
+          if (any (shape(l_array(i,j)%arr) /= [i,j])) stop 1
+          if (any (shape(a_array(i,j)%arr) /= [i,j])) stop 1
+          if (any (shape(la_array(i,j)%arr) /= [i,j])) stop 1
+          if (is_present) then
+            if (any (shape(opt_array(i,j)%arr) /= [i,j])) stop 1
+            if (any (shape(a_opt_array(i,j)%arr) /= [i,j])) stop 1
+          endif
+          do l = 1, j
+            do k = 1, i
+              if (array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 17
+              if (l_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 17
+              if (a_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 18
+              if (la_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 18
+              if (is_present) then
+                if (opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 19
+                if (a_opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 20
+              end if
+            end do
+          end do
+        end do
+      end do
+    else if (dummy_alloced) then
+      if (allocated (scalar%slr)) stop 1
+      if (allocated (l_scalar%slr)) stop 1
+      if (allocated (a_scalar%slr)) stop 1
+      if (allocated (la_scalar%slr)) stop 1
+      if (is_present) then
+        if (allocated (opt_scalar%slr)) stop 1
+        if (allocated (a_opt_scalar%slr)) stop 1
+      endif
+      if (allocated (scalar%arr)) stop 1
+      if (allocated (l_scalar%arr)) stop 1
+      if (allocated (a_scalar%arr)) stop 1
+      if (allocated (la_scalar%arr)) stop 1
+      if (is_present) then
+        if (allocated (opt_scalar%arr)) stop 1
+        if (allocated (a_opt_scalar%arr)) stop 1
+      endif
+    end if
+
+    ! SET VALUE
+    scalar%var = 42 + 13
+    l_scalar%var = 42 + 13
+    if (is_present) then
+      opt_scalar%var = 42 + 13
+    endif
+    do j = 1, 2
+      do i = 1, 3
+        array(i,j)%var = i*97 + 100*41*j + 13
+        l_array(i,j)%var = i*97 + 100*41*j + 13
+        if (is_present) then
+          opt_array(i,j)%var = i*97 + 100*41*j + 13
+        end if
+      end do
+    end do
+
+    if (dummy_alloced) then
+      a_scalar%var = 42 + 13
+      la_scalar%var = 42 + 13
+      if (is_present) then
+        a_opt_scalar%var = 42 + 13
+      endif
+      do j = 1, 2
+        do i = 1, 3
+          a_array(i,j)%var = i*97 + 100*41*j + 13
+          la_array(i,j)%var = i*97 + 100*41*j + 13
+          if (is_present) then
+            a_opt_array(i,j)%var = i*97 + 100*41*j + 13
+          endif
+        end do
+      end do
+    end if
+
+    if (inner_alloc) then
+      scalar%slr = 467 + 13
+      l_scalar%slr = 467 + 13
+      a_scalar%slr = 467 + 13
+      la_scalar%slr = 467 + 13
+      if (is_present) then
+        opt_scalar%slr = 467 + 13
+        a_opt_scalar%slr = 467 + 13
+      end if
+      do j = 1, 2
+        do i = 1, 3
+          array(i,j)%slr = (i*97 + 100*41*j)  + 467 + 13
+          l_array(i,j)%slr = (i*97 + 100*41*j)  + 467 + 13
+          a_array(i,j)%slr = (i*97 + 100*41*j)  + 467 + 13
+          la_array(i,j)%slr = (i*97 + 100*41*j)  + 467 + 13
+          if (is_present) then
+            opt_array(i,j)%slr = (i*97 + 100*41*j)  + 467 + 13
+            a_opt_array(i,j)%slr = (i*97 + 100*41*j)  + 467 + 13
+          end if
+        end do
+      end do
+
+      do l = 1, 5
+        do k = 1, 4
+          scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13
+          l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13
+          a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13
+          la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13
+          if (is_present) then
+            opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13
+            a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13
+          end if
+        end do
+      end do
+      do j = 1, 2
+        do i = 1, 3
+          do l = 1, j
+            do k = 1, i
+              array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13
+              l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13
+              a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13
+              la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13
+              if (is_present) then
+                opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13
+                a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13
+              end if
+            end do
+          end do
+        end do
+      end do
+    end if
+
+  end subroutine
+  subroutine check_reset (is_present, dummy_alloced, inner_alloc, &
+                          scalar, array, a_scalar, a_array, &
+                          l_scalar, l_array, la_scalar, la_array, &
+                          opt_scalar, opt_array, a_opt_scalar, a_opt_array)
+    type(t), intent(inout) :: &
+            scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:), &
+            a_opt_scalar, a_opt_array(:,:), &
+            l_scalar, l_array(:,:), la_scalar, la_array(:,:)
+    optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array
+    allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array
+    logical, value :: is_present, dummy_alloced, inner_alloc
+    integer :: i, j, k, l
+
+    ! CHECK VALUE
+    if (scalar%var /= 42 + 13) stop 1
+    if (l_scalar%var /= 42 + 13) stop 1
+    if (is_present) then
+      if (opt_scalar%var /= 42 + 13) stop 2
+    end if
+    if (any (shape(array) /= [3,2])) stop 1
+    if (any (shape(l_array) /= [3,2])) stop 1
+    if (is_present) then
+      if (any (shape(opt_array) /= [3,2])) stop 1
+    end if
+    do j = 1, 2
+      do i = 1, 3
+        if (array(i,j)%var /= i*97 + 100*41*j + 13) stop 3
+        if (l_array(i,j)%var /= i*97 + 100*41*j + 13) stop 3
+        if (is_present) then
+          if (opt_array(i,j)%var /= i*97 + 100*41*j + 13) stop 4
+        end if
+      end do
+    end do
+
+    if (dummy_alloced) then
+      if (a_scalar%var /= 42 + 13) stop 1
+      if (la_scalar%var /= 42 + 13) stop 1
+      if (is_present) then
+        if (a_opt_scalar%var /= 42 + 13) stop 1
+      end if
+      if (any (shape(a_array) /= [3,2])) stop 1
+      if (any (shape(la_array) /= [3,2])) stop 1
+      if (is_present) then
+        if (any (shape(a_opt_array) /= [3,2])) stop 1
+      end if
+      do j = 1, 2
+        do i = 1, 3
+          if (a_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1
+          if (la_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1
+          if (is_present) then
+            if (a_opt_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1
+          end if
+        end do
+      end do
+    else
+      if (allocated (a_scalar)) stop 1
+      if (allocated (la_scalar)) stop 1
+      if (allocated (a_array)) stop 1
+      if (allocated (la_array)) stop 1
+      if (is_present) then
+        if (allocated (a_opt_scalar)) stop 1
+        if (allocated (a_opt_array)) stop 1
+      end if
+    end if
+
+    if (inner_alloc) then
+      if (scalar%slr /= 467 + 13) stop 5
+      if (l_scalar%slr /= 467 + 13) stop 5
+      if (a_scalar%slr /= 467 + 13) stop 6
+      if (la_scalar%slr /= 467 + 13) stop 6
+      if (is_present) then
+        if (opt_scalar%slr /= 467 + 13) stop 7
+        if (a_opt_scalar%slr /= 467 + 13) stop 8
+      end if
+      do j = 1, 2
+        do i = 1, 3
+          if (array(i,j)%slr /= (i*97 + 100*41*j)  + 467 + 13) stop 9
+          if (l_array(i,j)%slr /= (i*97 + 100*41*j)  + 467 + 13) stop 9
+          if (a_array(i,j)%slr /= (i*97 + 100*41*j)  + 467 + 13) stop 10
+          if (la_array(i,j)%slr /= (i*97 + 100*41*j)  + 467 + 13) stop 10
+          if (is_present) then
+            if (opt_array(i,j)%slr /= (i*97 + 100*41*j)  + 467 + 13) stop 11
+            if (a_opt_array(i,j)%slr /= (i*97 + 100*41*j)  + 467 + 13) stop 12
+          end if
+        end do
+      end do
+
+      do l = 1, 5
+        do k = 1, 4
+          if (any (shape(scalar%arr) /= [4,5])) stop 1
+          if (any (shape(l_scalar%arr) /= [4,5])) stop 1
+          if (any (shape(a_scalar%arr) /= [4,5])) stop 1
+          if (any (shape(la_scalar%arr) /= [4,5])) stop 1
+          if (scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 13
+          if (l_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 13
+          if (a_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 14
+          if (la_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 14
+          if (is_present) then
+            if (any (shape(opt_scalar%arr) /= [4,5])) stop 1
+            if (any (shape(a_opt_scalar%arr) /= [4,5])) stop 1
+            if (opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 15
+            if (a_opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 16
+          end if
+        end do
+      end do
+      do j = 1, 2
+        do i = 1, 3
+          if (any (shape(array(i,j)%arr) /= [i,j])) stop 1
+          if (any (shape(l_array(i,j)%arr) /= [i,j])) stop 1
+          if (any (shape(a_array(i,j)%arr) /= [i,j])) stop 1
+          if (any (shape(la_array(i,j)%arr) /= [i,j])) stop 1
+          if (is_present) then
+            if (any (shape(opt_array(i,j)%arr) /= [i,j])) stop 1
+            if (any (shape(a_opt_array(i,j)%arr) /= [i,j])) stop 1
+          endif
+          do l = 1, j
+            do k = 1, i
+              if (array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 17
+              if (l_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 17
+              if (a_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 18
+              if (la_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 18
+              if (is_present) then
+                if (opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 19
+                if (a_opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 20
+              end if
+            end do
+          end do
+        end do
+      end do
+    else if (dummy_alloced) then
+      if (allocated (scalar%slr)) stop 1
+      if (allocated (l_scalar%slr)) stop 1
+      if (allocated (a_scalar%slr)) stop 1
+      if (allocated (la_scalar%slr)) stop 1
+      if (is_present) then
+        if (allocated (opt_scalar%slr)) stop 1
+        if (allocated (a_opt_scalar%slr)) stop 1
+      endif
+      if (allocated (scalar%arr)) stop 1
+      if (allocated (l_scalar%arr)) stop 1
+      if (allocated (a_scalar%arr)) stop 1
+      if (allocated (la_scalar%arr)) stop 1
+      if (is_present) then
+        if (allocated (opt_scalar%arr)) stop 1
+        if (allocated (a_opt_scalar%arr)) stop 1
+      endif
+    end if
+
+    ! (RE)SET VALUE
+    scalar%var = 42
+    l_scalar%var = 42
+    if (is_present) then
+      opt_scalar%var = 42
+    endif
+    do j = 1, 2
+      do i = 1, 3
+        array(i,j)%var = i*97 + 100*41*j
+        l_array(i,j)%var = i*97 + 100*41*j
+        if (is_present) then
+          opt_array(i,j)%var = i*97 + 100*41*j
+        end if
+      end do
+    end do
+
+    if (dummy_alloced) then
+      a_scalar%var = 42
+      la_scalar%var = 42
+      if (is_present) then
+        a_opt_scalar%var = 42
+      endif
+      do j = 1, 2
+        do i = 1, 3
+          a_array(i,j)%var = i*97 + 100*41*j
+          la_array(i,j)%var = i*97 + 100*41*j
+          if (is_present) then
+            a_opt_array(i,j)%var = i*97 + 100*41*j
+          endif
+        end do
+      end do
+    end if
+
+    if (inner_alloc) then
+      scalar%slr = 467
+      l_scalar%slr = 467
+      a_scalar%slr = 467
+      la_scalar%slr = 467
+      if (is_present) then
+        opt_scalar%slr = 467
+        a_opt_scalar%slr = 467
+      end if
+      do j = 1, 2
+        do i = 1, 3
+          array(i,j)%slr = (i*97 + 100*41*j)  + 467
+          l_array(i,j)%slr = (i*97 + 100*41*j)  + 467
+          a_array(i,j)%slr = (i*97 + 100*41*j)  + 467
+          la_array(i,j)%slr = (i*97 + 100*41*j)  + 467
+          if (is_present) then
+            opt_array(i,j)%slr = (i*97 + 100*41*j)  + 467
+            a_opt_array(i,j)%slr = (i*97 + 100*41*j)  + 467
+          end if
+        end do
+      end do
+
+      do l = 1, 5
+        do k = 1, 4
+          scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+          l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+          a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+          la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+          if (is_present) then
+            opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+            a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+          end if
+        end do
+      end do
+      do j = 1, 2
+        do i = 1, 3
+          do l = 1, j
+            do k = 1, i
+              array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+              l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+              a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+              la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+              if (is_present) then
+                opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+                a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+              end if
+            end do
+          end do
+        end do
+      end do
+    end if
+  end subroutine
+
+  subroutine test(scalar, array, a_scalar, a_array, opt_scalar, opt_array, &
+                  a_opt_scalar, a_opt_array)
+    type(t) :: scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:)
+    type(t) :: a_opt_scalar, a_opt_array(:,:)
+    type(t) :: l_scalar, l_array(3,2), la_scalar, la_array(:,:)
+    allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array
+    optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array
+
+    integer :: i, j, k, l
+    logical :: is_present, dummy_alloced, local_alloced, inner_alloc
+    is_present = present(opt_scalar)
+    dummy_alloced = allocated(a_scalar)
+    inner_alloc = allocated(scalar%slr)
+
+    l_scalar%var = 42
+    do j = 1, 2
+      do i = 1, 3
+        l_array(i,j)%var = i*97 + 100*41*j
+      end do
+    end do
+
+    if (dummy_alloced) then
+      allocate(la_scalar, la_array(3,2))
+      a_scalar%var = 42
+      la_scalar%var = 42
+      do j = 1, 2
+        do i = 1, 3
+          l_array(i,j)%var = i*97 + 100*41*j
+          la_array(i,j)%var = i*97 + 100*41*j
+        end do
+      end do
+    end if
+
+    if (inner_alloc) then
+      l_scalar%slr = 467
+      la_scalar%slr = 467
+      do j = 1, 2
+        do i = 1, 3
+          l_array(i,j)%slr = (i*97 + 100*41*j)  + 467
+          la_array(i,j)%slr = (i*97 + 100*41*j)  + 467
+        end do
+      end do
+
+      allocate(l_scalar%arr(4,5), la_scalar%arr(4,5))
+      do l = 1, 5
+        do k = 1, 4
+          l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+          la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+        end do
+      end do
+      do j = 1, 2
+        do i = 1, 3
+          allocate(l_array(i,j)%arr(i,j), la_array(i,j)%arr(i,j))
+          do l = 1, j
+            do k = 1, i
+              l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+              la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+            end do
+          end do
+        end do
+      end do
+    end if
+
+    ! implicit mapping
+    !$omp target
+      if (is_present) then
+        call check_it (is_present, dummy_alloced, inner_alloc, &
+                       scalar, array, a_scalar, a_array, &
+                       l_scalar, l_array, la_scalar, la_array, &
+                       opt_scalar, opt_array, a_opt_scalar, a_opt_array)
+      else
+        call check_it (is_present, dummy_alloced, inner_alloc, &
+                       scalar, array, a_scalar, a_array, &
+                       l_scalar, l_array, la_scalar, la_array)
+      end if
+    !$omp end target
+
+    if (is_present) then
+      call check_reset (is_present, dummy_alloced, inner_alloc, &
+                        scalar, array, a_scalar, a_array, &
+                        l_scalar, l_array, la_scalar, la_array, &
+                        opt_scalar, opt_array, a_opt_scalar, a_opt_array)
+    else
+      call check_reset (is_present, dummy_alloced, inner_alloc, &
+                        scalar, array, a_scalar, a_array, &
+                        l_scalar, l_array, la_scalar, la_array)
+    endif
+
+    ! explicit mapping
+    !$omp target map(scalar, array, opt_scalar, opt_array, a_scalar, a_array) &
+    !$omp&       map(a_opt_scalar, a_opt_array) &
+    !$omp&       map(l_scalar, l_array, la_scalar, la_array)
+      if (is_present) then
+        call check_it (is_present, dummy_alloced, inner_alloc, &
+                       scalar, array, a_scalar, a_array, &
+                       l_scalar, l_array, la_scalar, la_array, &
+                       opt_scalar, opt_array, a_opt_scalar, a_opt_array)
+      else
+        call check_it (is_present, dummy_alloced, inner_alloc, &
+                       scalar, array, a_scalar, a_array, &
+                       l_scalar, l_array, la_scalar, la_array)
+      endif
+    !$omp end target
+
+    if (is_present) then
+      call check_reset (is_present, dummy_alloced, inner_alloc, &
+                        scalar, array, a_scalar, a_array, &
+                        l_scalar, l_array, la_scalar, la_array, &
+                        opt_scalar, opt_array, a_opt_scalar, a_opt_array)
+    else
+      call check_reset (is_present, dummy_alloced, inner_alloc, &
+                        scalar, array, a_scalar, a_array, &
+                        l_scalar, l_array, la_scalar, la_array)
+    endif
+  end subroutine
+end module
+
+program main
+  use m
+  implicit none (type, external)
+  type(t) :: scalar, array(3,2), opt_scalar, opt_array(3,2), a_scalar, a_array(:,:)
+  type(t) :: a_opt_scalar, a_opt_array(:,:)
+  allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array
+  integer :: i, j, k, l, n
+
+  scalar%var = 42
+  opt_scalar%var = 42
+  do j = 1, 2
+    do i = 1, 3
+      array(i,j)%var = i*97 + 100*41*j
+      opt_array(i,j)%var = i*97 + 100*41*j
+    end do
+  end do
+
+  ! unallocated
+  call test (scalar, array, a_scalar, a_array)
+  call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array)
+
+  ! allocated
+  allocate(a_scalar, a_opt_scalar, a_array(3,2), a_opt_array(3,2))
+  a_scalar%var = 42
+  a_opt_scalar%var = 42
+  do j = 1, 2
+    do i = 1, 3
+      a_array(i,j)%var = i*97 + 100*41*j
+      a_opt_array(i,j)%var = i*97 + 100*41*j
+    end do
+  end do
+
+  call test (scalar, array, a_scalar, a_array)
+  call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array)
+
+  ! comps allocated
+  scalar%slr = 467
+  a_scalar%slr = 467
+  opt_scalar%slr = 467
+  a_opt_scalar%slr = 467
+  do j = 1, 2
+    do i = 1, 3
+      array(i,j)%slr = (i*97 + 100*41*j)  + 467
+      a_array(i,j)%slr = (i*97 + 100*41*j)  + 467
+      opt_array(i,j)%slr = (i*97 + 100*41*j)  + 467
+      a_opt_array(i,j)%slr = (i*97 + 100*41*j)  + 467
+    end do
+  end do
+
+  allocate(scalar%arr(4,5), a_scalar%arr(4,5), opt_scalar%arr(4,5), a_opt_scalar%arr(4,5))
+  do l = 1, 5
+    do k = 1, 4
+      scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+      a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+      opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+      a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467
+    end do
+  end do
+  do j = 1, 2
+    do i = 1, 3
+      allocate(array(i,j)%arr(i,j), a_array(i,j)%arr(i,j), opt_array(i,j)%arr(i,j), a_opt_array(i,j)%arr(i,j))
+      do l = 1, j
+        do k = 1, i
+          array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+          a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+          opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+          a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l
+        end do
+      end do
+    end do
+  end do
+
+  call test (scalar, array, a_scalar, a_array)
+  call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array)
+
+  deallocate(a_scalar, a_opt_scalar, a_array, a_opt_array)
+end
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90
new file mode 100644
index 0000000..f5a286e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90
@@ -0,0 +1,268 @@
+module m
+  implicit none (type, external)
+  type t
+    integer, allocatable :: A(:)
+  end type t
+  type t2
+    type(t), allocatable :: vT
+    integer, allocatable :: x
+  end type t2
+
+contains
+
+  subroutine test_alloc()
+    type(t) :: var
+    type(t), allocatable :: var2
+
+    allocate(var2)
+    allocate(var%A(4), var2%A(5))
+
+    !$omp target enter data map(alloc: var, var2)
+    !$omp target
+      if (.not. allocated(Var2)) stop 1
+      if (.not. allocated(Var%A)) stop 2
+      if (.not. allocated(Var2%A)) stop 3
+      if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4
+      if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5
+      var%A = [1,2,3,4]
+      var2%A = [11,22,33,44,55]
+    !$omp end target
+    !$omp target exit data map(from: var, var2)
+
+    if (.not. allocated(Var2)) error stop
+    if (.not. allocated(Var%A)) error stop
+    if (.not. allocated(Var2%A)) error stop
+    if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop
+    if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop
+    if (any(var%A /= [1,2,3,4])) error stop
+    if (any(var2%A /= [11,22,33,44,55])) error stop
+  end subroutine test_alloc
+
+  subroutine test2_alloc()
+    type(t2) :: var
+    type(t2), allocatable :: var2
+
+    allocate(var2)
+    allocate(var%x, var2%x)
+
+    !$omp target enter data map(alloc: var, var2)
+    !$omp target
+      if (.not. allocated(Var2)) stop 6
+      if (.not. allocated(Var%x)) stop 7
+      if (.not. allocated(Var2%x)) stop 8
+      var%x = 42
+      var2%x = 43
+    !$omp end target
+    !$omp target exit data map(from: var, var2)
+
+    if (.not. allocated(Var2)) error stop
+    if (.not. allocated(Var%x)) error stop
+    if (.not. allocated(Var2%x)) error stop
+    if (var%x /= 42) error stop
+    if (var2%x /= 43) error stop
+
+    allocate(var%vt, var2%vt)
+    allocate(var%vt%A(-1:3), var2%vt%A(0:4))
+
+    !$omp target enter data map(alloc: var, var2)
+    !$omp target
+      if (.not. allocated(Var2)) stop 11
+      if (.not. allocated(Var%x)) stop 12
+      if (.not. allocated(Var2%x)) stop 13
+      if (.not. allocated(Var%vt)) stop 14
+      if (.not. allocated(Var2%vt)) stop 15
+      if (.not. allocated(Var%vt%a)) stop 16
+      if (.not. allocated(Var2%vt%a)) stop 17
+      var%x = 42
+      var2%x = 43
+      if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4
+      if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5
+      var%vt%A = [1,2,3,4,5]
+      var2%vt%A = [11,22,33,44,55]
+    !$omp end target
+    !$omp target exit data map(from: var, var2)
+
+    if (.not. allocated(Var2)) error stop
+    if (.not. allocated(Var%x)) error stop
+    if (.not. allocated(Var2%x)) error stop
+    if (.not. allocated(Var%vt)) error stop
+    if (.not. allocated(Var2%vt)) error stop
+    if (.not. allocated(Var%vt%a)) error stop
+    if (.not. allocated(Var2%vt%a)) error stop
+    if (var%x /= 42) error stop
+    if (var2%x /= 43) error stop
+    if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop
+    if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop
+    if (any(var%vt%A /= [1,2,3,4,5])) error stop
+    if (any(var2%vt%A /= [11,22,33,44,55])) error stop
+  end subroutine test2_alloc
+
+
+  subroutine test_alloc_target()
+    type(t) :: var
+    type(t), allocatable :: var2
+
+    allocate(var2)
+    allocate(var%A(4), var2%A(5))
+
+    !$omp target map(alloc: var, var2)
+      if (.not. allocated(Var2)) stop 1
+      if (.not. allocated(Var%A)) stop 2
+      if (.not. allocated(Var2%A)) stop 3
+      if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4
+      if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5
+      var%A = [1,2,3,4]
+      var2%A = [11,22,33,44,55]
+    !$omp end target
+
+    if (.not. allocated(Var2)) error stop
+    if (.not. allocated(Var%A)) error stop
+    if (.not. allocated(Var2%A)) error stop
+    if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop
+    if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop
+  end subroutine test_alloc_target
+
+  subroutine test2_alloc_target()
+    type(t2) :: var
+    type(t2), allocatable :: var2
+
+    allocate(var2)
+    allocate(var%x, var2%x)
+
+    !$omp target map(alloc: var, var2)
+      if (.not. allocated(Var2)) stop 6
+      if (.not. allocated(Var%x)) stop 7
+      if (.not. allocated(Var2%x)) stop 8
+      var%x = 42
+      var2%x = 43
+    !$omp end target
+
+    if (.not. allocated(Var2)) error stop
+    if (.not. allocated(Var%x)) error stop
+    if (.not. allocated(Var2%x)) error stop
+
+    allocate(var%vt, var2%vt)
+    allocate(var%vt%A(-1:3), var2%vt%A(0:4))
+
+    !$omp target map(alloc: var, var2)
+      if (.not. allocated(Var2)) stop 11
+      if (.not. allocated(Var%x)) stop 12
+      if (.not. allocated(Var2%x)) stop 13
+      if (.not. allocated(Var%vt)) stop 14
+      if (.not. allocated(Var2%vt)) stop 15
+      if (.not. allocated(Var%vt%a)) stop 16
+      if (.not. allocated(Var2%vt%a)) stop 17
+      var%x = 42
+      var2%x = 43
+      if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4
+      if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5
+      var%vt%A = [1,2,3,4,5]
+      var2%vt%A = [11,22,33,44,55]
+    !$omp end target
+
+    if (.not. allocated(Var2)) error stop
+    if (.not. allocated(Var%x)) error stop
+    if (.not. allocated(Var2%x)) error stop
+    if (.not. allocated(Var%vt)) error stop
+    if (.not. allocated(Var2%vt)) error stop
+    if (.not. allocated(Var%vt%a)) error stop
+    if (.not. allocated(Var2%vt%a)) error stop
+    if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop
+    if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop
+  end subroutine test2_alloc_target
+
+
+
+  subroutine test_from()
+    type(t) :: var
+    type(t), allocatable :: var2
+
+    allocate(var2)
+    allocate(var%A(4), var2%A(5))
+
+    !$omp target map(from: var, var2)
+      if (.not. allocated(Var2)) stop 1
+      if (.not. allocated(Var%A)) stop 2
+      if (.not. allocated(Var2%A)) stop 3
+      if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4
+      if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5
+      var%A = [1,2,3,4]
+      var2%A = [11,22,33,44,55]
+    !$omp end target
+
+    if (.not. allocated(Var2)) error stop
+    if (.not. allocated(Var%A)) error stop
+    if (.not. allocated(Var2%A)) error stop
+    if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop
+    if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop
+    if (any(var%A /= [1,2,3,4])) error stop
+    if (any(var2%A /= [11,22,33,44,55])) error stop
+  end subroutine test_from
+
+  subroutine test2_from()
+    type(t2) :: var
+    type(t2), allocatable :: var2
+
+    allocate(var2)
+    allocate(var%x, var2%x)
+
+    !$omp target map(from: var, var2)
+      if (.not. allocated(Var2)) stop 6
+      if (.not. allocated(Var%x)) stop 7
+      if (.not. allocated(Var2%x)) stop 8
+      var%x = 42
+      var2%x = 43
+    !$omp end target
+
+    if (.not. allocated(Var2)) error stop
+    if (.not. allocated(Var%x)) error stop
+    if (.not. allocated(Var2%x)) error stop
+    if (var%x /= 42) error stop
+    if (var2%x /= 43) error stop
+
+    allocate(var%vt, var2%vt)
+    allocate(var%vt%A(-1:3), var2%vt%A(0:4))
+
+    !$omp target map(from: var, var2)
+      if (.not. allocated(Var2)) stop 11
+      if (.not. allocated(Var%x)) stop 12
+      if (.not. allocated(Var2%x)) stop 13
+      if (.not. allocated(Var%vt)) stop 14
+      if (.not. allocated(Var2%vt)) stop 15
+      if (.not. allocated(Var%vt%a)) stop 16
+      if (.not. allocated(Var2%vt%a)) stop 17
+      var%x = 42
+      var2%x = 43
+      if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4
+      if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5
+      var%vt%A = [1,2,3,4,5]
+      var2%vt%A = [11,22,33,44,55]
+    !$omp end target
+
+    if (.not. allocated(Var2)) error stop
+    if (.not. allocated(Var%x)) error stop
+    if (.not. allocated(Var2%x)) error stop
+    if (.not. allocated(Var%vt)) error stop
+    if (.not. allocated(Var2%vt)) error stop
+    if (.not. allocated(Var%vt%a)) error stop
+    if (.not. allocated(Var2%vt%a)) error stop
+    if (var%x /= 42) error stop
+    if (var2%x /= 43) error stop
+    if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop
+    if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop
+    if (any(var%vt%A /= [1,2,3,4,5])) error stop
+    if (any(var2%vt%A /= [11,22,33,44,55])) error stop
+  end subroutine test2_from
+
+end module m
+
+use m
+  implicit none (type, external)
+  call test_alloc
+  call test2_alloc
+  call test_alloc_target
+  call test2_alloc_target
+
+  call test_from
+  call test2_from
+end
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90
new file mode 100644
index 0000000..90378c0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9-usm.f90
@@ -0,0 +1,11 @@
+! { dg-additional-options "-cpp -DUSE_USM_REQUIREMENT=1 -Wno-openmp" }
+!
+! We silence the warning:
+!  Mapping of polymorphic list item '...' is unspecified behavior [-Wopenmp]
+!
+! Ensure that polymorphic mapping is diagnosed as undefined behavior
+! Ensure that static access to polymorphic variables works
+
+! Run map-alloc-comp-9.f90 in unified-shared-memory mode
+
+#include "map-alloc-comp-9.f90"
diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
new file mode 100644
index 0000000..26c73d7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90
@@ -0,0 +1,578 @@
+! { dg-additional-options "-cpp" }
+!
+! Ensure that polymorphic mapping is diagnosed as undefined behavior
+! Ensure that static access to polymorphic variables works
+
+! Some extended tests are only run with shared memory
+! To enforce this (where possible) on the device side:
+!   #define USE_USM_REQUIREMENT
+! which is done in map-alloc-comp-9-usm.f90
+
+subroutine test(case)
+implicit none(type, external)
+#ifdef USE_USM_REQUIREMENT
+  !$omp requires unified_shared_memory
+#endif
+
+type t
+  integer :: x(4)
+end type t
+
+type ta
+  integer, allocatable :: x(:)
+end type ta
+
+type t2
+  class(t), allocatable :: x
+  class(t), allocatable :: x2(:)
+end type t2
+
+type t3
+   type(t2) :: y
+   type(t2) :: y2(2)
+end type t3
+
+type t4
+   type(t3), allocatable :: y
+   type(t3), allocatable :: y2(:)
+end type t4
+
+integer, value :: case
+
+logical :: is_shared_mem
+
+! Mangle stack addresses
+integer, volatile :: case_var(100*case)
+
+type(t), allocatable :: var1
+type(ta), allocatable :: var1a
+class(t), allocatable :: var2
+type(t2), allocatable :: var3
+type(t4), allocatable :: var4
+
+case_var(100) = 0
+!print *, 'case', case
+
+var1 = t([1,2,3,4])
+var1a = ta([-1,-2,-3,-4,-5])
+
+var2 = t([11,22,33,44])
+
+allocate(t2 :: var3)
+allocate(t  :: var3%x)
+allocate(t  :: var3%x2(2))
+var3%x%x = [111,222,333,444]
+var3%x2(1)%x = 2*[111,222,333,444]
+var3%x2(2)%x = 3*[111,222,333,444]
+
+allocate(t4 :: var4)
+allocate(t3 :: var4%y)
+allocate(t3 :: var4%y2(2))
+allocate(t :: var4%y%y%x)
+allocate(t :: var4%y%y%x2(2))
+allocate(t :: var4%y2(1)%y%x)
+allocate(t :: var4%y2(1)%y%x2(2))
+allocate(t :: var4%y2(2)%y%x)
+allocate(t :: var4%y2(2)%y%x2(2))
+var4%y%y%x%x = -1 * [1111,2222,3333,4444]
+var4%y%y%x2(1)%x = -2 * [1111,2222,3333,4444]
+var4%y%y%x2(2)%x = -3 * [1111,2222,3333,4444]
+var4%y2(1)%y%x%x = -4 * [1111,2222,3333,4444]
+var4%y2(1)%y%x2(1)%x = -5 * [1111,2222,3333,4444]
+var4%y2(1)%y%x2(2)%x = -6 * [1111,2222,3333,4444]
+var4%y2(2)%y%x%x = -7 * [1111,2222,3333,4444]
+var4%y2(2)%y%x2(1)%x = -8 * [1111,2222,3333,4444]
+var4%y2(2)%y%x2(2)%x = -9 * [1111,2222,3333,4444]
+
+#ifdef USE_USM_REQUIREMENT
+is_shared_mem = .true.
+#else
+is_shared_mem = .false.
+!$omp target map(to: is_shared_mem)
+  is_shared_mem = .true.
+!$omp end target
+#endif
+
+if (case == 1) then
+  ! implicit mapping
+  !$omp target
+    if (any (var1%x /= [1,2,3,4])) stop 1
+    var1%x = 2 * var1%x
+  !$omp end target
+
+  !$omp target
+    if (any (var1a%x /= [-1,-2,-3,-4])) stop 2
+    var1a%x = 3 * var1a%x
+  !$omp end target
+
+  !$omp target  ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var2%x /= [11,22,33,44])) stop 3
+    var2%x = 4 * var2%x
+  !$omp end target
+
+  !$omp target  ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var3%x%x /= [111,222,333,444])) stop 4
+    var3%x%x = 5 * var3%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4
+      if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4
+      var3%x2(1)%x = 5 * var3%x2(1)%x
+      var3%x2(2)%x = 5 * var3%x2(2)%x
+    end if
+  !$omp end target
+
+  !$omp target  ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5
+    end if
+    var4%y%y%x%x = 6 * var4%y%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x
+      var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x
+    endif
+    var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x
+      var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x
+    endif
+    var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x
+      var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x
+    endif
+  !$omp end target
+
+else if (case == 2) then
+  ! Use target with defaultmap(TO)
+
+  !$omp target defaultmap(to : all)
+    if (any (var1%x /= [1,2,3,4])) stop 1
+    var1%x = 2 * var1%x
+  !$omp end target
+
+  !$omp target defaultmap(to : all)
+    if (any (var1a%x /= [-1,-2,-3,-4])) stop 2
+    var1a%x = 3 * var1a%x
+  !$omp end target
+
+  !$omp target defaultmap(to : all) ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var2%x /= [11,22,33,44])) stop 3
+    var2%x = 4 * var2%x
+  !$omp end target
+
+  !$omp target defaultmap(to : all)  ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var3%x%x /= [111,222,333,444])) stop 4
+    var3%x%x = 5 * var3%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4
+      if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4
+      var3%x2(1)%x = 5 * var3%x2(1)%x
+      var3%x2(2)%x = 5 * var3%x2(2)%x
+    endif
+  !$omp end target
+
+  !$omp target defaultmap(to : all) firstprivate(is_shared_mem)  ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5
+    endif
+    var4%y%y%x%x = 6 * var4%y%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x
+      var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x
+    endif
+    var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x
+      var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x
+    endif
+    var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x
+      var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x
+    endif
+  !$omp end target
+
+else if (case == 3) then
+  ! Use target with map clause
+
+  !$omp target map(tofrom: var1)
+    if (any (var1%x /= [1,2,3,4])) stop 1
+    var1%x = 2 * var1%x
+  !$omp end target
+
+  !$omp target map(tofrom: var1a)
+    if (any (var1a%x /= [-1,-2,-3,-4])) stop 2
+    var1a%x = 3 * var1a%x
+  !$omp end target
+
+  !$omp target map(tofrom: var2)  ! { dg-warning "28: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var2%x /= [11,22,33,44])) stop 3
+    var2%x = 4 * var2%x
+  !$omp end target
+
+  !$omp target map(tofrom: var3)  ! { dg-warning "28: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var3%x%x /= [111,222,333,444])) stop 4
+    var3%x%x = 5 * var3%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4
+      if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4
+      var3%x2(1)%x = 5 * var3%x2(1)%x
+      var3%x2(2)%x = 5 * var3%x2(2)%x
+    endif
+  !$omp end target
+
+  !$omp target map(tofrom: var4)  ! { dg-warning "28: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5
+    end if
+    if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5
+    endif
+    var4%y%y%x%x = 6 * var4%y%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x
+      var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x
+    endif
+    var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x
+      var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x
+    endif
+    var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x
+      var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x
+    endif
+  !$omp end target
+
+else if (case == 4) then
+  ! Use target with map clause -- NOTE: This uses TO not TOFROM
+
+  !$omp target map(to: var1)
+    if (any (var1%x /= [1,2,3,4])) stop 1
+    var1%x = 2 * var1%x
+  !$omp end target
+
+  !$omp target map(to: var1a)
+    if (any (var1a%x /= [-1,-2,-3,-4])) stop 2
+    var1a%x = 3 * var1a%x
+  !$omp end target
+
+  !$omp target map(to: var2)  ! { dg-warning "24: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var2%x /= [11,22,33,44])) stop 3
+    var2%x = 4 * var2%x
+  !$omp end target
+
+  !$omp target map(to: var3)  ! { dg-warning "24: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var3%x%x /= [111,222,333,444])) stop 4
+    var3%x%x = 5 * var3%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4
+      if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4
+      var3%x2(1)%x = 5 * var3%x2(1)%x
+      var3%x2(2)%x = 5 * var3%x2(2)%x
+    endif
+  !$omp end target
+
+  !$omp target map(to: var4)  ! { dg-warning "24: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5
+    endif
+    var4%y%y%x%x = 6 * var4%y%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x
+      var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x
+    endif
+    var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x
+      var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x
+    endif
+    var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x
+      var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x
+    endif
+  !$omp end target
+
+else if (case == 5) then
+  ! Use target enter/exit data + target with explicit map
+  !$omp target enter data map(to: var1)
+  !$omp target enter data map(to: var1a)
+  !$omp target enter data map(to: var2)  ! { dg-warning "35: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" }
+  !$omp target enter data map(to: var3)  ! { dg-warning "35: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" }
+  !$omp target enter data map(to: var4)  ! { dg-warning "35: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" }
+
+  !$omp target map(to: var1)
+    if (any (var1%x /= [1,2,3,4])) stop 1
+    var1%x = 2 * var1%x
+  !$omp end target
+
+  !$omp target map(to: var1a)
+    if (any (var1a%x /= [-1,-2,-3,-4])) stop 2
+    var1a%x = 3 * var1a%x
+  !$omp end target
+
+  !$omp target map(to: var2)  ! { dg-warning "24: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var2%x /= [11,22,33,44])) stop 3
+    var2%x = 4 * var2%x
+  !$omp end target
+
+  !$omp target map(to: var3)  ! { dg-warning "24: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var3%x%x /= [111,222,333,444])) stop 4
+    var3%x%x = 5 * var3%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4
+      if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4
+      var3%x2(1)%x = 5 * var3%x2(1)%x
+      var3%x2(2)%x = 5 * var3%x2(2)%x
+    endif
+  !$omp end target
+
+  !$omp target map(to: var4)  ! { dg-warning "24: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5
+    endif
+    var4%y%y%x%x = 6 * var4%y%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x
+      var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x
+    endif
+    var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x
+      var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x
+    endif
+    var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x
+      var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x
+    endif
+  !$omp end target
+
+  !$omp target exit data map(from: var1)
+  !$omp target exit data map(from: var1a)
+  !$omp target exit data map(from: var2)  ! { dg-warning "36: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" }
+  !$omp target exit data map(from: var3)  ! { dg-warning "36: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" }
+  !$omp target exit data map(from: var4)  ! { dg-warning "36: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" }
+
+else if (case == 6) then
+  ! Use target enter/exit data + target with implicit map
+
+  !$omp target enter data map(to: var1)
+  !$omp target enter data map(to: var1a)
+  !$omp target enter data map(to: var2)  ! { dg-warning "35: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" }
+  !$omp target enter data map(to: var3)  ! { dg-warning "35: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" }
+  !$omp target enter data map(to: var4)  ! { dg-warning "35: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" }
+
+  !$omp target
+    if (any (var1%x /= [1,2,3,4])) stop 1
+    var1%x = 2 * var1%x
+  !$omp end target
+
+  !$omp target
+    if (any (var1a%x /= [-1,-2,-3,-4])) stop 2
+    var1a%x = 3 * var1a%x
+  !$omp end target
+
+  !$omp target  ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var2%x /= [11,22,33,44])) stop 3
+    var2%x = 4 * var2%x
+  !$omp end target
+
+  !$omp target  ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var3%x%x /= [111,222,333,444])) stop 4
+    var3%x%x = 5 * var3%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4
+      if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4
+      var3%x2(1)%x = 5 * var3%x2(1)%x
+      var3%x2(2)%x = 5 * var3%x2(2)%x
+    endif
+  !$omp end target
+
+  !$omp target  ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" }
+    if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5
+    endif
+    if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5
+      if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5
+    endif
+    var4%y%y%x%x = 6 * var4%y%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x
+      var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x
+    endif
+    var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x
+      var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x
+    endif
+    var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x
+    if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+      var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x
+      var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x
+    endif
+  !$omp end target
+
+  !$omp target exit data map(from: var1)
+  !$omp target exit data map(from: var1a)
+  !$omp target exit data map(from: var2)  ! { dg-warning "36: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" }
+  !$omp target exit data map(from: var3)  ! { dg-warning "36: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" }
+  !$omp target exit data map(from: var4)  ! { dg-warning "36: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" }
+
+else
+  error stop
+end if
+
+if ((case /= 2 .and. case /= 4)  .or. is_shared_mem) then
+  ! The target update should have been active, check for the updated values
+  if (any (var1%x /= 2 * [1,2,3,4])) stop 11
+  if (any (var1a%x /= 3 * [-1,-2,-3,-4])) stop 22
+  if (any (var2%x /= 4 * [11,22,33,44])) stop 33
+
+  if (any (var3%x%x /= 5 * [111,222,333,444])) stop 44
+  if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+    if (any (var3%x2(1)%x /= 2 * 5 * [111,222,333,444])) stop 44
+    if (any (var3%x2(2)%x /= 3 * 5 * [111,222,333,444])) stop 44
+  endif
+
+  if (any (var4%y%y%x%x /= -1 * 6 * [1111,2222,3333,4444])) stop 55
+  if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+    if (any (var4%y%y%x2(1)%x /= -2 * 6 * [1111,2222,3333,4444])) stop 55
+    if (any (var4%y%y%x2(2)%x /= -3 * 6 * [1111,2222,3333,4444])) stop 55
+  endif
+  if (any (var4%y2(1)%y%x%x /= -4 * 6 * [1111,2222,3333,4444])) stop 55
+  if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+    if (any (var4%y2(1)%y%x2(1)%x /= -5 * 6 * [1111,2222,3333,4444])) stop 55
+    if (any (var4%y2(1)%y%x2(2)%x /= -6 * 6 * [1111,2222,3333,4444])) stop 55
+  endif
+  if (any (var4%y2(2)%y%x%x /= -7 * 6 * [1111,2222,3333,4444])) stop 55
+  if (is_shared_mem) then  ! For stride data, this accesses the host's _vtab
+    if (any (var4%y2(2)%y%x2(1)%x /= -8 * 6 * [1111,2222,3333,4444])) stop 55
+    if (any (var4%y2(2)%y%x2(2)%x /= -9 * 6 * [1111,2222,3333,4444])) stop 55
+  endif
+else
+  ! The old host values should still be there as 'to:' created a device copy
+  if (any (var1%x /= [1,2,3,4])) stop 12
+  if (any (var1a%x /= [-1,-2,-3,-4])) stop 22
+  if (any (var2%x /= [11,22,33,44])) stop 33
+
+  if (any (var3%x%x /= [111,222,333,444])) stop 44
+  ! .not. is_shared_mem:
+  ! if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 44
+  ! if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 44
+
+  if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 55
+  if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 55
+  if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 55
+  if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 55
+  ! .not. is_shared_mem:
+  !if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 55
+  !if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 55
+  if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 55
+  ! .not. is_shared_mem:
+  !if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 55
+  !if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 55
+end if
+if (case_var(100) /= 0) stop 123
+end subroutine test
+
+program main
+  use omp_lib
+  implicit none(type, external)
+#ifdef USE_USM_REQUIREMENT
+  !$omp requires unified_shared_memory
+#endif
+
+  interface
+    subroutine test(case)
+      integer, value :: case
+    end
+  end interface
+  integer :: dev
+  call run_it(omp_get_default_device())
+  do dev = 0, omp_get_num_devices()
+    call run_it(dev)
+  end do
+  call run_it(omp_initial_device)
+!  print *, 'all done'
+contains
+subroutine run_it(dev)
+  integer, value :: dev
+!  print *, 'DEVICE', dev
+  call omp_set_default_device(dev)
+  call test(1)
+  call test(2)
+  call test(3)
+  call test(4)
+  call test(5)
+  call test(6)
+end
+end
diff --git a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90 b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
index 7b3e09f..d6f4d5b 100644
--- a/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/metadirective-1.f90
@@ -1,4 +1,5 @@
-! { dg-do run }
+! { dg-do run { target { ! offload_target_nvptx } } }
+! { dg-do compile { target offload_target_nvptx } }
 
 program test
   implicit none
@@ -33,6 +34,10 @@ program test
 contains
   subroutine f (x, y, z)
     integer :: x(N), y(N), z(N)
+    ! The following fails as on the host the target side cannot be
+    ! resolved - and the 'teams' or not status affects how 'target'
+    ! is called. -> See PR118694, esp. comment 9.
+    ! Note also the dg-do compile above for offload_target_nvptx
 
     !$omp target map (to: x, y) map(from: z)
       block
@@ -43,6 +48,7 @@ contains
 	  z(i) = x(i) * y(i)
 	enddo
       end block
+    ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 }  */
   end subroutine
   subroutine g (x, y, z)
     integer :: x(N), y(N), z(N)
@@ -56,6 +62,7 @@ contains
 	  z(i) = x(i) * y(i)
 	enddo
     end block
+    ! { dg-bogus "'target' construct with nested 'teams' construct contains directives outside of the 'teams' construct" "PR118694" { xfail offload_target_nvptx } .-9 }  */
     !$omp end target
   end subroutine
 end program
diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90
new file mode 100644
index 0000000..2641086
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset-2.f90
@@ -0,0 +1,67 @@
+! PR libgomp/120444
+! Async version
+
+use omp_lib
+use iso_c_binding
+implicit none (type, external)
+integer(c_int) :: dev
+
+!$omp parallel do
+do dev = omp_initial_device, omp_get_num_devices ()
+block
+  integer(c_int) :: i, val, start, tail
+  type(c_ptr) :: ptr, ptr2, tmpptr
+  integer(c_int8_t), pointer, contiguous :: fptr(:)
+  integer(c_intptr_t) :: intptr
+  integer(c_size_t), parameter :: count = 1024
+  integer(omp_depend_kind) :: dep(1)
+
+  ptr = omp_target_alloc (count, dev)
+
+  !$omp depobj(dep(1)) depend(inout: ptr)
+
+  ! Play also around with the alignment - as hsa_amd_memory_fill operates
+  ! on multiples of 4 bytes (c_int32_t)
+
+  do start = 0, 31
+    do tail = 0, 31
+      val = iachar('0') + start + tail
+
+      tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr)
+      ptr2 = omp_target_memset_async (tmpptr, val, count - start - tail, dev, 0)
+
+      if (.not. c_associated (tmpptr, ptr2)) stop 1
+
+      !$omp taskwait
+
+      !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait
+        do i = 1 + start, int(count, c_int) - start - tail
+          call c_f_pointer (ptr, fptr, [count])
+          if (fptr(i) /= int (val, c_int8_t)) stop 2
+          fptr(i) = fptr(i) + 2_c_int8_t
+        end do
+      !$omp end target
+
+      ptr2 = omp_target_memset_async (tmpptr, val + 3, &
+                                      count - start - tail, dev, 1, dep)
+
+      !$omp target device(dev) is_device_ptr(ptr) depend(depobj: dep(1)) nowait
+        do i = 1 + start, int(count, c_int) - start - tail
+          call c_f_pointer (ptr, fptr, [count])
+          if (fptr(i) /= int (val + 3, c_int8_t)) stop 3
+          fptr(i) = fptr(i) - 1_c_int8_t
+        end do
+      !$omp end target
+
+      ptr2 = omp_target_memset_async (tmpptr, val - 3, &
+                                      count - start - tail, dev, 1, dep)
+
+      !$omp taskwait depend (depobj: dep(1))
+    end do
+  end do
+
+  !$omp depobj(dep(1)) destroy
+  call omp_target_free (ptr, dev);
+end block
+end do
+end
diff --git a/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90 b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90
new file mode 100644
index 0000000..1ee184a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/omp_target_memset.f90
@@ -0,0 +1,39 @@
+! PR libgomp/120444
+
+use omp_lib
+use iso_c_binding
+implicit none (type, external)
+
+integer(c_int) :: dev, i, val, start, tail
+type(c_ptr) :: ptr, ptr2, tmpptr
+integer(c_int8_t), pointer, contiguous :: fptr(:)
+integer(c_intptr_t) :: intptr
+integer(c_size_t), parameter :: count = 1024
+
+do dev = omp_initial_device, omp_get_num_devices ()
+  ptr = omp_target_alloc (count, dev)
+
+  ! Play also around with the alignment - as hsa_amd_memory_fill operates
+  ! on multiples of 4 bytes (c_int32_t)
+
+  do start = 0, 31
+    do tail = 0, 31
+      val = iachar('0') + start + tail
+
+      tmpptr = transfer (transfer (ptr, intptr) + start, tmpptr)
+      ptr2 = omp_target_memset (tmpptr, val, count - start - tail, dev)
+
+      if (.not. c_associated (tmpptr, ptr2)) stop 1
+
+      !$omp target device(dev) is_device_ptr(ptr)
+        do i = 1 + start, int(count, c_int) - start - tail
+          call c_f_pointer (ptr, fptr, [count])
+          if (fptr(i) /= int (val, c_int8_t)) stop 2
+        end do
+      !$omp end target
+    end do
+  end do
+
+  call omp_target_free (ptr, dev);
+end do
+end
diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90
new file mode 100644
index 0000000..c6d671c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-8.f90
@@ -0,0 +1,532 @@
+! { dg-additional-options "-cpp" }
+
+! FIXME: Some tests do not work yet. Those are for now in '#if 0'
+
+! Check that 'map(alloc:' properly works with
+! - deferred-length character strings
+! - arrays with array descriptors
+! For those, the array descriptor / string length must be mapped with 'to:'
+
+program main
+implicit none
+
+type t
+  integer :: ic(2:5), ic2
+  character(len=11) :: ccstr(3:4), ccstr2
+  character(len=11,kind=4) :: cc4str(3:7), cc4str2
+  integer, pointer :: pc(:), pc2
+  character(len=:), pointer :: pcstr(:), pcstr2
+  character(len=:,kind=4), pointer :: pc4str(:), pc4str2
+end type t
+
+type(t) :: dt
+
+integer :: ii(5), ii2
+character(len=11) :: clstr(-1:1), clstr2
+character(len=11,kind=4) :: cl4str(0:3), cl4str2
+integer, pointer :: ip(:), ip2
+integer, allocatable :: ia(:), ia2
+character(len=:), pointer :: pstr(:), pstr2
+character(len=:), allocatable :: astr(:), astr2
+character(len=:,kind=4), pointer :: p4str(:), p4str2
+character(len=:,kind=4), allocatable :: a4str(:), a4str2
+
+
+allocate(dt%pc(5), dt%pc2)
+allocate(character(len=2) :: dt%pcstr(2))
+allocate(character(len=4) :: dt%pcstr2)
+
+allocate(character(len=3,kind=4) :: dt%pc4str(2:3))
+allocate(character(len=5,kind=4) :: dt%pc4str2)
+
+allocate(ip(5), ip2, ia(8), ia2)
+allocate(character(len=2) :: pstr(-2:0))
+allocate(character(len=4) :: pstr2)
+allocate(character(len=6) :: astr(3:5))
+allocate(character(len=8) :: astr2)
+
+allocate(character(len=3,kind=4) :: p4str(2:4))
+allocate(character(len=5,kind=4) :: p4str2)
+allocate(character(len=7,kind=4) :: a4str(-2:3))
+allocate(character(len=9,kind=4) :: a4str2)
+
+
+! integer :: ic(2:5), ic2
+
+!$omp target enter data map(alloc: dt%ic)
+!$omp target map(alloc: dt%ic)
+  if (size(dt%ic) /= 4) error stop
+  if (lbound(dt%ic, 1) /= 2) error stop
+  if (ubound(dt%ic, 1) /= 5) error stop
+  dt%ic = [22, 33, 44, 55]
+!$omp end target
+!$omp target exit data map(from: dt%ic)
+if (size(dt%ic) /= 4) error stop
+if (lbound(dt%ic, 1) /= 2) error stop
+if (ubound(dt%ic, 1) /= 5) error stop
+if (any (dt%ic /= [22, 33, 44, 55])) error stop
+
+!$omp target enter data map(alloc: dt%ic2)
+!$omp target map(alloc: dt%ic2)
+  dt%ic2 = 42
+!$omp end target
+!$omp target exit data map(from: dt%ic2)
+if (dt%ic2 /= 42) error stop
+
+
+! character(len=11) :: ccstr(3:4), ccstr2
+
+!$omp target enter data map(alloc: dt%ccstr)
+!$omp target map(alloc: dt%ccstr)
+  if (len(dt%ccstr) /= 11) error stop
+  if (size(dt%ccstr) /= 2) error stop
+  if (lbound(dt%ccstr, 1) /= 3) error stop
+  if (ubound(dt%ccstr, 1) /= 4) error stop
+  dt%ccstr = ["12345678901", "abcdefghijk"]
+!$omp end target
+!$omp target exit data map(from: dt%ccstr)
+if (len(dt%ccstr) /= 11) error stop
+if (size(dt%ccstr) /= 2) error stop
+if (lbound(dt%ccstr, 1) /= 3) error stop
+if (ubound(dt%ccstr, 1) /= 4) error stop
+if (any (dt%ccstr /= ["12345678901", "abcdefghijk"])) error stop
+
+!$omp target enter data map(alloc: dt%ccstr2)
+!$omp target map(alloc: dt%ccstr2)
+  if (len(dt%ccstr2) /= 11) error stop
+  dt%ccstr2 = "ABCDEFGHIJK"
+!$omp end target
+!$omp target exit data map(from: dt%ccstr2)
+if (len(dt%ccstr2) /= 11) error stop
+if (dt%ccstr2 /= "ABCDEFGHIJK") error stop
+
+
+! character(len=11,kind=4) :: cc4str(3:7), cc4str2
+
+#if 0
+! Value check fails
+!$omp target map(alloc: dt%cc4str)
+  if (len(dt%cc4str) /= 11) error stop
+  if (size(dt%cc4str) /= 5) error stop
+  if (lbound(dt%cc4str, 1) /= 3) error stop
+  if (ubound(dt%cc4str, 1) /= 7) error stop
+  dt%cc4str = [4_"12345678901", 4_"abcdefghijk", &
+               4_"qerftcea6ds", 4_"a1f9g37ga4.", &
+               4_"45ngwj56sj2"]
+!$omp end target
+!$omp target exit data map(from: dt%cc4str)
+if (len(dt%cc4str) /= 11) error stop
+if (size(dt%cc4str) /= 5) error stop
+if (lbound(dt%cc4str, 1) /= 3) error stop
+if (ubound(dt%cc4str, 1) /= 7) error stop
+if (dt%cc4str(3) /= 4_"12345678901") error stop
+if (dt%cc4str(4) /= 4_"abcdefghijk") error stop
+if (dt%cc4str(5) /= 4_"qerftcea6ds") error stop
+if (dt%cc4str(6) /= 4_"a1f9g37ga4.") error stop
+if (dt%cc4str(7) /= 4_"45ngwj56sj2") error stop
+#endif
+
+!$omp target enter data map(alloc: dt%cc4str2)
+!$omp target map(alloc: dt%cc4str2)
+  if (len(dt%cc4str2) /= 11) error stop
+  dt%cc4str2 = 4_"ABCDEFGHIJK"
+!$omp end target
+!$omp target exit data map(from: dt%cc4str2)
+if (len(dt%cc4str2) /= 11) error stop
+if (dt%cc4str2 /= 4_"ABCDEFGHIJK") error stop
+
+
+! integer, pointer :: pc(:), pc2
+! allocate(dt%pc(5), dt%pc2)
+
+!$omp target enter data map(alloc: dt%pc)
+!$omp target map(alloc: dt%pc)
+  if (.not. associated(dt%pc)) error stop
+  if (size(dt%pc) /= 5) error stop
+  if (lbound(dt%pc, 1) /= 1) error stop
+  if (ubound(dt%pc, 1) /= 5) error stop
+  dt%pc = [11, 22, 33, 44, 55]
+!$omp end target
+!$omp target exit data map(from: dt%pc)
+if (.not. associated(dt%pc)) error stop
+if (size(dt%pc) /= 5) error stop
+if (lbound(dt%pc, 1) /= 1) error stop
+if (ubound(dt%pc, 1) /= 5) error stop
+if (any (dt%pc /= [11, 22, 33, 44, 55])) error stop
+
+!$omp target enter data map(alloc: dt%pc2)
+!$omp target map(alloc: dt%pc2)
+  if (.not. associated(dt%pc2)) error stop
+  dt%pc2 = 99
+!$omp end target
+!$omp target exit data map(from: dt%pc2)
+if (dt%pc2 /= 99) error stop
+if (.not. associated(dt%pc2)) error stop
+
+
+! character(len=:), pointer :: pcstr(:), pcstr2
+! allocate(character(len=2) :: dt%pcstr(2))
+! allocate(character(len=4) :: dt%pcstr2)
+
+!$omp target enter data map(alloc: dt%pcstr)
+!$omp target map(alloc: dt%pcstr)
+  if (.not. associated(dt%pcstr)) error stop
+  if (len(dt%pcstr) /= 2) error stop
+  if (size(dt%pcstr) /= 2) error stop
+  if (lbound(dt%pcstr, 1) /= 1) error stop
+  if (ubound(dt%pcstr, 1) /= 2) error stop
+  dt%pcstr = ["01", "jk"]
+!$omp end target
+!$omp target exit data map(from: dt%pcstr)
+if (.not. associated(dt%pcstr)) error stop
+if (len(dt%pcstr) /= 2) error stop
+if (size(dt%pcstr) /= 2) error stop
+if (lbound(dt%pcstr, 1) /= 1) error stop
+if (ubound(dt%pcstr, 1) /= 2) error stop
+if (any (dt%pcstr /= ["01", "jk"])) error stop
+
+
+!$omp target enter data map(alloc: dt%pcstr2)
+!$omp target map(alloc: dt%pcstr2)
+  if (.not. associated(dt%pcstr2)) error stop
+  if (len(dt%pcstr2) /= 4) error stop
+  dt%pcstr2 = "HIJK"
+!$omp end target
+!$omp target exit data map(from: dt%pcstr2)
+if (.not. associated(dt%pcstr2)) error stop
+if (len(dt%pcstr2) /= 4) error stop
+if (dt%pcstr2 /= "HIJK") error stop
+
+
+! character(len=:,kind=4), pointer :: pc4str(:), pc4str2
+! allocate(character(len=3,kind=4) :: dt%pc4str(2:3))
+! allocate(character(len=5,kind=4) :: dt%pc4str2)
+
+!$omp target enter data map(alloc: dt%pc4str)
+!$omp target map(alloc: dt%pc4str)
+  if (.not. associated(dt%pc4str)) error stop
+  if (len(dt%pc4str) /= 3) error stop
+  if (size(dt%pc4str) /= 2) error stop
+  if (lbound(dt%pc4str, 1) /= 2) error stop
+  if (ubound(dt%pc4str, 1) /= 3) error stop
+  dt%pc4str = [4_"456", 4_"tzu"]
+!$omp end target
+!$omp target exit data map(from: dt%pc4str)
+if (.not. associated(dt%pc4str)) error stop
+if (len(dt%pc4str) /= 3) error stop
+if (size(dt%pc4str) /= 2) error stop
+if (lbound(dt%pc4str, 1) /= 2) error stop
+if (ubound(dt%pc4str, 1) /= 3) error stop
+if (dt%pc4str(2) /= 4_"456") error stop
+if (dt%pc4str(3) /= 4_"tzu") error stop
+
+!$omp target enter data map(alloc: dt%pc4str2)
+!$omp target map(alloc: dt%pc4str2)
+  if (.not. associated(dt%pc4str2)) error stop
+  if (len(dt%pc4str2) /= 5) error stop
+  dt%pc4str2 = 4_"98765"
+!$omp end target
+!$omp target exit data map(from: dt%pc4str2)
+if (.not. associated(dt%pc4str2)) error stop
+if (len(dt%pc4str2) /= 5) error stop
+if (dt%pc4str2 /= 4_"98765") error stop
+
+
+! integer :: ii(5), ii2
+
+!$omp target enter data map(alloc: ii)
+!$omp target map(alloc: ii)
+  if (size(ii) /= 5) error stop
+  if (lbound(ii, 1) /= 1) error stop
+  if (ubound(ii, 1) /= 5) error stop
+  ii = [-1, -2, -3, -4, -5]
+!$omp end target
+!$omp target exit data map(from: ii)
+if (size(ii) /= 5) error stop
+if (lbound(ii, 1) /= 1) error stop
+if (ubound(ii, 1) /= 5) error stop
+if (any (ii /= [-1, -2, -3, -4, -5])) error stop
+
+!$omp target enter data map(alloc: ii2)
+!$omp target map(alloc: ii2)
+  ii2 = -410
+!$omp end target
+!$omp target exit data map(from: ii2)
+if (ii2 /= -410) error stop
+
+
+! character(len=11) :: clstr(-1:1), clstr2
+
+!$omp target enter data map(alloc: clstr)
+!$omp target map(alloc: clstr)
+  if (len(clstr) /= 11) error stop
+  if (size(clstr) /= 3) error stop
+  if (lbound(clstr, 1) /= -1) error stop
+  if (ubound(clstr, 1) /= 1) error stop
+  clstr = ["12345678901", "abcdefghijk", "ABCDEFGHIJK"]
+!$omp end target
+!$omp target exit data map(from: clstr)
+if (len(clstr) /= 11) error stop
+if (size(clstr) /= 3) error stop
+if (lbound(clstr, 1) /= -1) error stop
+if (ubound(clstr, 1) /= 1) error stop
+if (any (clstr /= ["12345678901", "abcdefghijk", "ABCDEFGHIJK"])) error stop
+
+!$omp target enter data map(alloc: clstr2)
+!$omp target map(alloc: clstr2)
+  if (len(clstr2) /= 11) error stop
+  clstr2 = "ABCDEFghijk"
+!$omp end target
+!$omp target exit data map(from: clstr2)
+if (len(clstr2) /= 11) error stop
+if (clstr2 /= "ABCDEFghijk") error stop
+
+
+! character(len=11,kind=4) :: cl4str(0:3), cl4str2
+
+!$omp target enter data map(alloc: cl4str)
+!$omp target map(alloc: cl4str)
+  if (len(cl4str) /= 11) error stop
+  if (size(cl4str) /= 4) error stop
+  if (lbound(cl4str, 1) /= 0) error stop
+  if (ubound(cl4str, 1) /= 3) error stop
+  cl4str = [4_"12345678901", 4_"abcdefghijk", &
+            4_"qerftcea6ds", 4_"a1f9g37ga4."]
+!$omp end target
+!$omp target exit data map(from: cl4str)
+if (len(cl4str) /= 11) error stop
+if (size(cl4str) /= 4) error stop
+if (lbound(cl4str, 1) /= 0) error stop
+if (ubound(cl4str, 1) /= 3) error stop
+if (cl4str(0) /= 4_"12345678901") error stop
+if (cl4str(1) /= 4_"abcdefghijk") error stop
+if (cl4str(2) /= 4_"qerftcea6ds") error stop
+if (cl4str(3) /= 4_"a1f9g37ga4.") error stop
+
+!$omp target enter data map(alloc: cl4str2)
+!$omp target map(alloc: cl4str2)
+  if (len(cl4str2) /= 11) error stop
+  cl4str2 = 4_"ABCDEFGHIJK"
+!$omp end target
+!$omp target exit data map(from: cl4str2)
+if (len(cl4str2) /= 11) error stop
+if (cl4str2 /= 4_"ABCDEFGHIJK") error stop
+
+
+! allocate(ip(5), ip2, ia(8), ia2)
+
+!$omp target enter data map(alloc: ip)
+!$omp target map(alloc: ip)
+  if (.not. associated(ip)) error stop
+  if (size(ip) /= 5) error stop
+  if (lbound(ip, 1) /= 1) error stop
+  if (ubound(ip, 1) /= 5) error stop
+  ip = [11, 22, 33, 44, 55]
+!$omp end target
+!$omp target exit data map(from: ip)
+if (.not. associated(ip)) error stop
+if (size(ip) /= 5) error stop
+if (lbound(ip, 1) /= 1) error stop
+if (ubound(ip, 1) /= 5) error stop
+if (any (ip /= [11, 22, 33, 44, 55])) error stop
+
+!$omp target enter data map(alloc: ip2)
+!$omp target map(alloc: ip2)
+  if (.not. associated(ip2)) error stop
+  ip2 = 99
+!$omp end target
+!$omp target exit data map(from: ip2)
+if (ip2 /= 99) error stop
+if (.not. associated(ip2)) error stop
+
+
+! allocate(ip(5), ip2, ia(8), ia2)
+
+!$omp target enter data map(alloc: ia)
+!$omp target map(alloc: ia)
+  if (.not. allocated(ia)) error stop
+  if (size(ia) /= 8) error stop
+  if (lbound(ia, 1) /= 1) error stop
+  if (ubound(ia, 1) /= 8) error stop
+  ia = [1,2,3,4,5,6,7,8]
+!$omp end target
+!$omp target exit data map(from: ia)
+if (.not. allocated(ia)) error stop
+if (size(ia) /= 8) error stop
+if (lbound(ia, 1) /= 1) error stop
+if (ubound(ia, 1) /= 8) error stop
+if (any (ia /= [1,2,3,4,5,6,7,8])) error stop
+
+!$omp target enter data map(alloc: ia2)
+!$omp target map(alloc: ia2)
+  if (.not. allocated(ia2)) error stop
+  ia2 = 102
+!$omp end target
+!$omp target exit data map(from: ia2)
+if (ia2 /= 102) error stop
+if (.not. allocated(ia2)) error stop
+
+
+! character(len=:), pointer :: pstr(:), pstr2
+! allocate(character(len=2) :: pstr(-2:0))
+! allocate(character(len=4) :: pstr2)
+
+!$omp target enter data map(alloc: pstr)
+!$omp target map(alloc: pstr)
+  if (.not. associated(pstr)) error stop
+  if (len(pstr) /= 2) error stop
+  if (size(pstr) /= 3) error stop
+  if (lbound(pstr, 1) /= -2) error stop
+  if (ubound(pstr, 1) /= 0) error stop
+  pstr = ["01", "jk", "aq"]
+!$omp end target
+!$omp target exit data map(from: pstr)
+if (.not. associated(pstr)) error stop
+if (len(pstr) /= 2) error stop
+if (size(pstr) /= 3) error stop
+if (lbound(pstr, 1) /= -2) error stop
+if (ubound(pstr, 1) /= 0) error stop
+if (any (pstr /= ["01", "jk", "aq"])) error stop
+
+!$omp target enter data map(alloc: pstr2)
+!$omp target map(alloc: pstr2)
+  if (.not. associated(pstr2)) error stop
+  if (len(pstr2) /= 4) error stop
+  pstr2 = "HIJK"
+!$omp end target
+!$omp target exit data map(from: pstr2)
+if (.not. associated(pstr2)) error stop
+if (len(pstr2) /= 4) error stop
+if (pstr2 /= "HIJK") error stop
+
+
+! character(len=:), allocatable :: astr(:), astr2
+! allocate(character(len=6) :: astr(3:5))
+! allocate(character(len=8) :: astr2)
+
+
+!$omp target enter data map(alloc: astr)
+!$omp target map(alloc: astr)
+  if (.not. allocated(astr)) error stop
+  if (len(astr) /= 6) error stop
+  if (size(astr) /= 3) error stop
+  if (lbound(astr, 1) /= 3) error stop
+  if (ubound(astr, 1) /= 5) error stop
+  astr = ["01db45", "jk$D%S", "zutg47"]
+!$omp end target
+!$omp target exit data map(from: astr)
+if (.not. allocated(astr)) error stop
+if (len(astr) /= 6) error stop
+if (size(astr) /= 3) error stop
+if (lbound(astr, 1) /= 3) error stop
+if (ubound(astr, 1) /= 5) error stop
+if (any (astr /= ["01db45", "jk$D%S", "zutg47"])) error stop
+
+
+!$omp target enter data map(alloc: astr2)
+!$omp target map(alloc: astr2)
+  if (.not. allocated(astr2)) error stop
+  if (len(astr2) /= 8) error stop
+  astr2 = "HIJKhijk"
+!$omp end target
+!$omp target exit data map(from: astr2)
+if (.not. allocated(astr2)) error stop
+if (len(astr2) /= 8) error stop
+if (astr2 /= "HIJKhijk") error stop
+
+
+! character(len=:,kind=4), pointer :: p4str(:), p4str2
+! allocate(character(len=3,kind=4) :: p4str(2:4))
+! allocate(character(len=5,kind=4) :: p4str2)
+
+! FAILS with value check
+
+!$omp target enter data map(alloc: p4str)
+!$omp target map(alloc: p4str)
+  if (.not. associated(p4str)) error stop
+  if (len(p4str) /= 3) error stop
+  if (size(p4str) /= 3) error stop
+  if (lbound(p4str, 1) /= 2) error stop
+  if (ubound(p4str, 1) /= 4) error stop
+  p4str(:) = [4_"f85", 4_"8af", 4_"A%F"]
+!$omp end target
+!$omp target exit data map(from: p4str)
+if (.not. associated(p4str)) error stop
+if (len(p4str) /= 3) error stop
+if (size(p4str) /= 3) error stop
+if (lbound(p4str, 1) /= 2) error stop
+if (ubound(p4str, 1) /= 4) error stop
+if (p4str(2)  /= 4_"f85") error stop
+if (p4str(3)  /= 4_"8af") error stop
+if (p4str(4)  /= 4_"A%F") error stop
+
+!$omp target enter data map(alloc: p4str2)
+!$omp target map(alloc: p4str2)
+  if (.not. associated(p4str2)) error stop
+  if (len(p4str2) /= 5) error stop
+  p4str2 = 4_"9875a"
+!$omp end target
+!$omp target exit data map(from: p4str2)
+if (.not. associated(p4str2)) error stop
+if (len(p4str2) /= 5) error stop
+if (p4str2 /= 4_"9875a") error stop
+
+
+! character(len=:,kind=4), allocatable :: a4str(:), a4str2
+! allocate(character(len=7,kind=4) :: a4str(-2:3))
+! allocate(character(len=9,kind=4) :: a4str2)
+
+!$omp target enter data map(alloc: a4str)
+!$omp target map(alloc: a4str)
+  if (.not. allocated(a4str)) error stop
+  if (len(a4str) /= 7) error stop
+  if (size(a4str) /= 6) error stop
+  if (lbound(a4str, 1) /= -2) error stop
+  if (ubound(a4str, 1) /= 3) error stop
+  ! See PR fortran/107508 why '(:)' is required
+  a4str(:) = [4_"sf456aq", 4_"3dtzu24", 4_"_4fh7sm", 4_"=ff85s7", 4_"j=8af4d", 4_".,A%Fsz"]
+!$omp end target
+!$omp target exit data map(from: a4str)
+if (.not. allocated(a4str)) error stop
+if (len(a4str) /= 7) error stop
+if (size(a4str) /= 6) error stop
+if (lbound(a4str, 1) /= -2) error stop
+if (ubound(a4str, 1) /= 3) error stop
+if (a4str(-2) /= 4_"sf456aq") error stop
+if (a4str(-1) /= 4_"3dtzu24") error stop
+if (a4str(0)  /= 4_"_4fh7sm") error stop
+if (a4str(1)  /= 4_"=ff85s7") error stop
+if (a4str(2)  /= 4_"j=8af4d") error stop
+if (a4str(3)  /= 4_".,A%Fsz") error stop
+
+!$omp target enter data map(alloc: a4str2)
+!$omp target map(alloc: a4str2)
+  if (.not. allocated(a4str2)) error stop
+  if (len(a4str2) /= 9) error stop
+  a4str2 = 4_"98765a23d"
+!$omp end target
+!$omp target exit data map(from: a4str2)
+if (.not. allocated(a4str2)) error stop
+if (len(a4str2) /= 9) error stop
+if (a4str2 /= 4_"98765a23d") error stop
+
+
+deallocate(dt%pc, dt%pc2)
+deallocate(dt%pcstr)
+deallocate(dt%pcstr2)
+
+deallocate(dt%pc4str)
+deallocate(dt%pc4str2)
+
+deallocate(ip, ip2, ia, ia2)
+deallocate(pstr)
+deallocate(pstr2)
+deallocate(astr)
+deallocate(astr2)
+
+deallocate(p4str)
+deallocate(p4str2)
+deallocate(a4str)
+deallocate(a4str2)
+
+end
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C
new file mode 100644
index 0000000..6957a6c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C
@@ -0,0 +1,57 @@
+/* 'std::bad_cast' exception in OpenACC compute region.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+/* See also '../libgomp.c++/target-exceptions-bad_cast-1.C'.  */
+
+/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-bad_cast-1.C',
+   '../../../gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-1.C'.  */
+
+#include <iostream>
+
+struct C1
+{
+  virtual void f()
+  {}
+};
+
+struct C2 : C1
+{
+};
+
+int main()
+{
+  std::cerr << "CheCKpOInT\n";
+#pragma omp target
+#pragma acc serial
+  /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */
+  {
+    C1 c1;
+    [[maybe_unused]]
+    C2 &c2 = dynamic_cast<C2 &>(c1);
+    /* 'std::bad_cast' is thrown.  */
+  }
+}
+
+/* { dg-output {CheCKpOInT[\r\n]+} }
+
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   For host execution, we print something like:
+       terminate called after throwing an instance of 'std::bad_cast'
+         what():  std::bad_cast
+       Aborted (core dumped)
+   { dg-output {.*std::bad_cast} { target openacc_host_selected } }
+   For GCN, nvptx offload execution, we don't print anything, but just 'abort'.
+
+   TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to
+   the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast';
+   PR119692.
+
+   { dg-shouldfail {'std::bad_cast' exception} } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 } ... to make sure that happens quickly.  */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C
new file mode 100644
index 0000000..8260966
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-GCN.C
@@ -0,0 +1,18 @@
+/* 'std::bad_cast' exception in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'.  */
+
+/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually.
+   { dg-do link { target openacc_radeon_accel_selected } } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "exceptions-bad_cast-2.C"
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   Given '-foffload-options=-mno-fake-exceptions', offload compilation fails:
+   { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} }
+   (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.)
+   { dg-excess-errors {'mkoffload' failure etc.} } */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C
new file mode 100644
index 0000000..86d3f6c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2-offload-sorry-nvptx.C
@@ -0,0 +1,20 @@
+/* 'std::bad_cast' exception in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'.  */
+
+/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually.
+   { dg-do link { target openacc_nvidia_accel_selected } } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "exceptions-bad_cast-2.C"
+
+/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   Given '-foffload-options=-mno-fake-exceptions', offload compilation fails:
+   { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} }
+   (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.)
+   { dg-excess-errors {'mkoffload' failure etc.} } */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C
new file mode 100644
index 0000000..0f84cf2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C
@@ -0,0 +1,63 @@
+/* 'std::bad_cast' exception in OpenACC compute region, caught.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+/* { dg-bogus {_ZTISt8bad_cast} PR119734 { target openacc_nvidia_accel_selected xfail *-*-* } 0 }
+   { dg-excess-errors {'mkoffload' failure etc.} { xfail openacc_nvidia_accel_selected } } */
+
+/* See also '../libgomp.c++/target-exceptions-bad_cast-2.C'.  */
+
+/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-bad_cast-2.C',
+   '../../../gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-2.C'.  */
+
+#include <iostream>
+#include <typeinfo>
+
+struct C1
+{
+  virtual void f()
+  {}
+};
+
+struct C2 : C1
+{
+};
+
+int main()
+{
+  std::cerr << "CheCKpOInT\n";
+#pragma omp target
+#pragma acc serial
+  {
+    C1 c1;
+    try
+      {
+	[[maybe_unused]]
+	C2 &c2 = dynamic_cast<C2 &>(c1);
+	/* 'std::bad_cast' is thrown.  */
+      }
+    catch (const std::bad_cast &e)
+      {
+	__builtin_printf("caught '%s'\n", e.what());
+      }
+  }
+}
+
+/* { dg-output {CheCKpOInT[\r\n]+} }
+
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } }
+   { dg-output {.*caught 'std::bad_cast'[\r\n]+} { target openacc_host_selected } }
+   For GCN, nvptx offload execution, we don't print anything, but just 'abort'.
+
+   TODO For GCN, nvptx offload execution, this currently doesn't 'abort' due to
+   the 'std::bad_cast' exception, but rather due to SIGSEGV in 'dynamic_cast';
+   PR119692.
+
+   For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal.
+   { dg-shouldfail {'std::bad_cast' exception} { ! openacc_host_selected } } */
+/* There are configurations where we 'WARNING: program timed out.' while in
+   'dynamic_cast', see <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119692#c6>.
+   { dg-timeout 10 } ... to make sure that happens quickly.  */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C
new file mode 100644
index 0000000..4fa419f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C
@@ -0,0 +1,49 @@
+/* 'std::bad_cast' exception in OpenACC compute region, dead code.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* Wrong code for offloading execution.
+   { dg-skip-if PR119692 { ! openacc_host_selected } }
+   { dg-additional-options -fdump-tree-gimple } */
+/* { dg-additional-options -fdump-tree-optimized-raw } */
+
+/* See also '../libgomp.c++/target-exceptions-bad_cast-3.C'.  */
+
+/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-bad_cast-3.C',
+   '../../../gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-3.C'.  */
+
+/* For PR119692 workarounds.  */
+#ifndef DEFAULT
+# define DEFAULT
+#endif
+
+struct C1
+{
+  virtual void f()
+  {}
+};
+
+struct C2 : C1
+{
+};
+
+int main()
+{
+#pragma omp target DEFAULT
+#pragma acc serial DEFAULT
+  {
+    C1 c1;
+    bool a = false;
+    asm volatile ("" : : "r" (&a) : "memory");
+    if (a)
+      {
+	[[maybe_unused]]
+	C2 &c2 = dynamic_cast<C2 &>(c1);
+	/* 'std::bad_cast' is thrown.  */
+      }
+  }
+}
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial map\(tofrom:_ZTI2C2 \[len: [0-9]+\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
new file mode 100644
index 0000000..08c5766
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C
@@ -0,0 +1,43 @@
+/* 'throw' in OpenACC compute region.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+/* See also '../libgomp.c++/target-exceptions-throw-1.C'.  */
+
+/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-throw-1.C',
+   '../../../gcc/testsuite/g++.target/nvptx/exceptions-throw-1.C'.  */
+
+#include <iostream>
+
+class MyException
+{
+};
+
+int main()
+{
+  std::cerr << "CheCKpOInT\n";
+#pragma omp target
+#pragma acc serial
+  /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */
+  {
+    MyException e1;
+    throw e1;
+  }
+}
+
+/* { dg-output {CheCKpOInT[\r\n]+} }
+
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   For host execution, we print something like:
+       terminate called after throwing an instance of 'MyException'
+       Aborted (core dumped)
+   { dg-output {.*MyException} { target openacc_host_selected } }
+   For GCN, nvptx offload execution, we don't print anything, but just 'abort'.
+
+   { dg-shouldfail {'MyException' exception} } */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C
new file mode 100644
index 0000000..40be837
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-GCN.C
@@ -0,0 +1,20 @@
+/* 'throw' in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'.  */
+
+/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually.
+   { dg-do link { target openacc_radeon_accel_selected } } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "exceptions-throw-2.C"
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { only_for_offload_target amdgcn-amdhsa scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   Given '-foffload-options=-mno-fake-exceptions', offload compilation fails:
+   { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} }
+   (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.)
+   { dg-excess-errors {'mkoffload' failure etc.} } */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C
new file mode 100644
index 0000000..9461455
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2-offload-sorry-nvptx.C
@@ -0,0 +1,22 @@
+/* 'throw' in OpenACC compute region, caught, '-foffload-options=-mno-fake-exceptions'.  */
+
+/* As this test case involves an expected offload compilation failure, we have to handle each offload target individually.
+   { dg-do link { target openacc_nvidia_accel_selected } } */
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -foffload-options=-mno-fake-exceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+
+#include "exceptions-throw-2.C"
+
+/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { only_for_offload_target nvptx-none scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   Given '-foffload-options=-mno-fake-exceptions', offload compilation fails:
+   { dg-regexp {[^\r\n]+: In function 'main[^']+':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} }
+   (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.)
+   { dg-excess-errors {'mkoffload' failure etc.} } */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
new file mode 100644
index 0000000..a7408cd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C
@@ -0,0 +1,52 @@
+/* 'throw' in OpenACC compute region, caught.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* { dg-additional-options -fdump-tree-optimized-raw }
+   { dg-additional-options -foffload-options=-fdump-tree-optimized-raw } */
+/* { dg-bogus {undefined symbol: typeinfo name for MyException} PR119806 { target { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 }
+   { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_radeon_accel_selected && { ! __OPTIMIZE__ } } } } */
+/* { dg-bogus {Initial value type mismatch} PR119806 { target { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } xfail *-*-* } 0 }
+   { dg-excess-errors {'mkoffload' failure etc.} { xfail { openacc_nvidia_accel_selected && { ! __OPTIMIZE__ } } } } */
+
+/* See also '../libgomp.c++/target-exceptions-throw-2.C'.  */
+
+/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-throw-2.C',
+   '../../../gcc/testsuite/g++.target/nvptx/exceptions-throw-2.C'.  */
+
+#include <iostream>
+
+class MyException
+{
+};
+
+int main()
+{
+  std::cerr << "CheCKpOInT\n";
+#pragma omp target
+#pragma acc serial
+  /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */
+  {
+    try
+      {
+	MyException e1;
+	throw e1;
+      }
+    catch (const MyException &e)
+      {
+	__builtin_printf("caught '%s'\n", "MyException");
+      }
+  }
+}
+
+/* { dg-output {CheCKpOInT[\r\n]+} }
+
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } }
+   { dg-output {.*caught 'MyException'[\r\n]+} { target openacc_host_selected } }
+   For GCN, nvptx offload execution, we don't print anything, but just 'abort'.
+
+   For GCN, nvptx offload execution, there is no 'catch'ing; any exception is fatal.
+   { dg-shouldfail {'MyException' exception} { ! openacc_host_selected } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C
new file mode 100644
index 0000000..74a62b3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C
@@ -0,0 +1,43 @@
+/* 'throw' in OpenACC compute region, dead code.  */
+
+/* { dg-require-effective-target exceptions }
+   { dg-additional-options -fexceptions } */
+/* Wrong code for offloading execution.
+   { dg-skip-if PR119692 { ! openacc_host_selected } }
+   { dg-additional-options -fdump-tree-gimple } */
+/* { dg-additional-options -fdump-tree-optimized-raw } */
+
+/* See also '../libgomp.c++/target-exceptions-throw-3.C'.  */
+
+/* See also '../../../gcc/testsuite/g++.target/gcn/exceptions-throw-3.C',
+   '../../../gcc/testsuite/g++.target/nvptx/exceptions-throw-3.C'.  */
+
+/* For PR119692 workarounds.  */
+#ifndef DEFAULT
+# define DEFAULT
+#endif
+
+class MyException
+{
+};
+
+int main()
+{
+#pragma omp target DEFAULT
+#pragma acc serial DEFAULT
+  /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */
+  {
+    bool a = false;
+    asm volatile ("" : : "r" (&a) : "memory");
+    if (a)
+      {
+	MyException e1;
+	throw e1;
+      }
+  }
+}
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial map\(tofrom:_ZTI11MyException \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } }
+   { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C
new file mode 100644
index 0000000..5c3e037
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C
@@ -0,0 +1,42 @@
+/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */
+
+/* { dg-additional-options -UDEFAULT }
+   Wrong code for offloading execution.
+   { dg-skip-if PR119692 { ! openacc_host_selected } } */
+/* { dg-additional-options -fdump-tree-gimple } */
+
+/* See also '../libgomp.c++/pr119692-1-1.C'.  */
+
+/* See also '../../../gcc/testsuite/g++.target/gcn/pr119692-1-1.C',
+   '../../../gcc/testsuite/g++.target/nvptx/pr119692-1-1.C'.  */
+
+#ifndef DEFAULT
+# define DEFAULT
+#endif
+
+struct C1
+{
+  virtual void f()
+  {}
+};
+
+struct C2 : C1
+{
+};
+
+int main()
+{
+#pragma omp target DEFAULT
+#pragma acc serial DEFAULT
+  /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */
+  {
+    C1 c1;
+    C1 *c1p = &c1;
+    asm volatile ("" : : "r" (&c1p) : "memory");
+    C2 *c2 = dynamic_cast<C2 *>(c1p);
+    if (c2)
+      __builtin_abort();
+  }
+}
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial map\(tofrom:_ZTI2C2 \[len: [0-9]+\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-2.C b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-2.C
new file mode 100644
index 0000000..207b183
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-2.C
@@ -0,0 +1,12 @@
+/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */
+
+/* { dg-additional-options -DDEFAULT=default(none) }
+   Wrong code for offloading execution.
+   { dg-skip-if PR119692 { ! openacc_host_selected } } */
+/* { dg-additional-options -fdump-tree-gimple } */
+
+#include "pr119692-1-1.C"
+
+/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial default\(none\) map\(tofrom:_ZTI2C2 \[len: [0-9]+\]\) map\(tofrom:_ZTI2C1 \[len: [0-9]+\]\) map\(tofrom:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-3.C b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-3.C
new file mode 100644
index 0000000..e9b44de
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/pr119692-1-3.C
@@ -0,0 +1,12 @@
+/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */
+
+/* { dg-additional-options -DDEFAULT=default(present) }
+   Wrong code for offloading execution.
+   { dg-xfail-run-if PR119692 { ! openacc_host_selected } } */
+/* { dg-additional-options -fdump-tree-gimple } */
+
+#include "pr119692-1-1.C"
+
+/* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } 0 } */
+
+/* { dg-final { scan-tree-dump-not {(?n)#pragma omp target oacc_serial default\(present\) map\(force_present:_ZTI2C2 \[len: [0-9]+\]\) map\(force_present:_ZTI2C1 \[len: [0-9]+\]\) map\(force_present:_ZTV2C1 \[len: [0-9]+\]\)$} gimple { xfail *-*-* } } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c
new file mode 100644
index 0000000..4b54171
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/abi-struct-1.c
@@ -0,0 +1,125 @@
+/* Inspired by 'gcc.target/nvptx/abi-struct-arg.c', 'gcc.target/nvptx/abi-struct-ret.c'.  */
+
+/* See also '../libgomp.c-c++-common/target-abi-struct-1.c'.  */
+
+/* To exercise PR119835 (if optimizations enabled): disable inlining, so that
+   GIMPLE passes still see the functions that return aggregate types.  */
+#pragma GCC optimize "-fno-inline"
+
+typedef struct {} empty;  /* See 'gcc/doc/extend.texi', "Empty Structures".  */
+typedef struct {char a;} schar;
+typedef struct {short a;} sshort;
+typedef struct {int a;} sint;
+typedef struct {long long a;} slonglong;
+typedef struct {int a, b[12];} sint_13;
+
+#pragma omp declare target
+
+#define M(T) ({T t; t.a = sizeof t; t;})
+
+static __SIZE_TYPE__ empty_a;
+#pragma acc declare create(empty_a)
+#pragma acc routine
+static empty rempty(void)
+{
+  return ({empty t; empty_a = sizeof t; t;});
+}
+
+#pragma acc routine
+static schar rschar(void)
+{
+  return M(schar);
+}
+
+#pragma acc routine
+static sshort rsshort(void)
+{
+  return M(sshort);
+}
+
+#pragma acc routine
+static sint rsint(void)
+{
+  return M(sint);
+}
+
+#pragma acc routine
+static slonglong rslonglong(void)
+{
+  return M(slonglong);
+}
+
+#pragma acc routine
+static sint_13 rsint_13(void)
+{
+  return M(sint_13);
+}
+
+#pragma acc routine
+static void aempty(empty empty)
+{
+  (void) empty;
+
+  __SIZE_TYPE__ empty_a_exp;
+#ifndef __cplusplus
+  empty_a_exp = 0;
+#else
+  empty_a_exp = sizeof (char);
+#endif
+  if (empty_a != empty_a_exp)
+    __builtin_abort();
+}
+
+#pragma acc routine
+static void aschar(schar schar)
+{
+  if (schar.a != sizeof (char))
+    __builtin_abort();
+}
+
+#pragma acc routine
+static void asshort(sshort sshort)
+{
+  if (sshort.a != sizeof (short))
+    __builtin_abort();
+}
+
+#pragma acc routine
+static void asint(sint sint)
+{
+  if (sint.a != sizeof (int))
+    __builtin_abort();
+}
+
+#pragma acc routine
+static void aslonglong(slonglong slonglong)
+{
+  if (slonglong.a != sizeof (long long))
+    __builtin_abort();
+}
+
+#pragma acc routine
+static void asint_13(sint_13 sint_13)
+{
+  if (sint_13.a != (sizeof (int) * 13))
+    __builtin_abort();
+}
+
+#pragma omp end declare target
+
+int main()
+{
+#pragma omp target
+#pragma acc serial
+  /* { dg-bogus {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected xfail *-*-* } .-1 } */
+  {
+    aempty(rempty());
+    aschar(rschar());
+    asshort(rsshort());
+    asint(rsint());
+    aslonglong(rslonglong());
+    asint_13(rsint_13());
+  }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c
new file mode 100644
index 0000000..eda651d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_memcpy_device-1.c
@@ -0,0 +1,96 @@
+/* { dg-prune-output "using .vector_length \\(32\\)" } */
+
+/* PR libgomp/93226  */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <openacc.h>
+
+enum { N = 1024 };
+
+static int D[N];
+#pragma acc declare device_resident(D)
+
+#pragma acc routine
+intptr_t init_d()
+{
+  for (int i = 0; i < N; i++)
+    D[i] = 27*i;
+  return (intptr_t) &D[0];
+}
+
+int
+main ()
+{
+  int *a, *b, *e;
+  void *d_a, *d_b, *d_c, *d_d, *d_e, *d_f;
+  intptr_t intptr;
+  bool fail = false;
+
+  a = (int *) malloc (N*sizeof (int));
+  b = (int *) malloc (N*sizeof (int));
+  e = (int *) malloc (N*sizeof (int));
+  d_c = acc_malloc (N*sizeof (int));
+  d_f = acc_malloc (N*sizeof (int));
+
+  memset (e, 0xff, N*sizeof (int));
+  d_e = acc_copyin (e, N*sizeof (int));
+
+  #pragma acc serial copyout(intptr)
+    intptr = init_d ();
+  d_d = (void*) intptr;
+  acc_memcpy_device (d_c, d_d, N*sizeof (int));
+
+  #pragma acc serial copy(fail) deviceptr(d_c) firstprivate(intptr)
+  {
+    int *cc = (int *) d_c;
+    int *dd = (int *) intptr;
+    for (int i = 0; i < N; i++)
+      if (dd[i] != 27*i || cc[i] != 27*i)
+	{
+	  fail = true;
+	  __builtin_abort ();
+	}
+  }
+  if (fail) __builtin_abort ();
+
+  for (int i = 0; i < N; i++)
+    a[i] = 11*i;
+  for (int i = 0; i < N; i++)
+    b[i] = 31*i;
+
+  d_a = acc_copyin (a, N*sizeof (int));
+  acc_copyin_async (b, N*sizeof (int), acc_async_noval);
+
+  #pragma acc parallel deviceptr(d_c) async
+  {
+    int *cc = (int *) d_c;
+    #pragma acc loop
+    for (int i = 0; i < N; i++)
+      cc[i] = -17*i;
+  }
+
+  acc_memcpy_device_async (d_d, d_a, N*sizeof (int), acc_async_noval);
+  acc_memcpy_device_async (d_f, d_c, N*sizeof (int), acc_async_noval);
+  acc_wait (acc_async_noval);
+  d_b = acc_deviceptr (b);
+  acc_memcpy_device_async (d_e, d_b, N*sizeof (int), acc_async_noval);
+  acc_wait (acc_async_noval);
+
+  #pragma acc serial deviceptr(d_d, d_e, d_f) copy(fail)
+  {
+    int *dd = (int *) d_d;
+    int *ee = (int *) d_e;
+    int *ff = (int *) d_f;
+    for (int i = 0; i < N; i++)
+      if (dd[i] != 11*i
+	  || ee[i] != 31*i
+	  || ff[i] != -17*i)
+	{
+	  fail = true;
+	  __builtin_abort ();
+	}
+  }
+  if (fail) __builtin_abort ();
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90
new file mode 100644
index 0000000..15393b4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90
@@ -0,0 +1,25 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original" }
+
+use openacc
+implicit none (type, external)
+integer,pointer :: a, b(:)
+integer,allocatable :: c, d(:)
+
+call acc_attach(a)  ! ICE
+call acc_attach_async(b, 4)
+call acc_attach(c)
+
+call acc_detach(a)
+call acc_detach_async(b, 4)
+call acc_detach_finalize(c)
+call acc_detach_finalize_async(d,7)
+end
+
+! { dg-final { scan-tree-dump-times "acc_attach \\(&a\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_attach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_attach \\(&c\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach \\(&a\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_finalize \\(&c\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "acc_detach_finalize_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\* restrict\\) d.data, 7\\);" 1 "original" } }
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90
new file mode 100644
index 0000000..b2204ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90
@@ -0,0 +1,62 @@
+! { dg-do run }
+
+use openacc
+implicit none (type, external)
+integer, target :: tgt_a, tgt_b(5)
+
+integer, pointer :: p1, p2(:)
+
+type t
+  integer,pointer :: a => null ()
+  integer,pointer :: b(:) => null ()
+  integer,allocatable :: c, d(:)
+end type t
+
+type(t), target :: var
+
+tgt_a = 51
+tgt_b = [11,22,33,44,55]
+
+var%b => tgt_b
+!$acc enter data copyin(var, tgt_a, tgt_b)
+var%a => tgt_a
+
+call acc_attach(var%a)
+call acc_attach(var%b)
+
+!$acc serial
+! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
+  if (var%a /= 51) stop 1
+  if (any (var%b /= [11,22,33,44,55])) stop 2
+!$acc end serial
+
+call acc_detach(var%a)
+call acc_detach(var%b)
+
+!$acc exit data delete(var, tgt_a, tgt_b)
+
+var%c = 9
+var%d = [1,2,3]
+
+p1 => var%c
+p2 => var%d
+
+!$acc enter data copyin(p1, p2)
+!$acc enter data copyin(var)
+call acc_attach(var%c)
+call acc_attach(var%d)
+
+!$acc serial
+! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 }
+  if (var%c /= 9) stop 3
+  if (any (var%d /= [1,2,3])) stop 4
+!$acc end serial
+
+call acc_detach(var%c)
+call acc_detach(var%d)
+
+!$acc exit data delete(var, p1, p2)
+
+deallocate(var%d)
+
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90
new file mode 100644
index 0000000..8f3a8f0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/acc_memcpy_device-1.f90
@@ -0,0 +1,113 @@
+! { dg-prune-output "using .vector_length \\(32\\)" }
+
+! PR libgomp/93226  */
+
+module m
+  use iso_c_binding
+  use openacc
+  implicit none (external, type)
+
+  integer, parameter :: N = 1024
+
+  integer :: D(N)
+  !$acc declare device_resident(D)
+
+contains
+
+  integer(c_intptr_t) function init_d()
+    !$acc routine
+    integer :: i
+    do i = 1, N
+      D(i) = 27*i
+    end do
+    init_d = loc(D)
+  end
+end module
+
+program main
+  use m
+  implicit none (external, type)
+
+  integer, allocatable, target :: a(:), b(:), e(:)
+  type(c_ptr) :: d_a, d_b, d_c, d_d, d_e, d_f
+  integer(c_intptr_t) intptr
+  integer :: i
+  logical fail
+
+  fail = .false.
+
+  allocate(a(N), b(N), e(N))
+  d_c = acc_malloc (N*c_sizeof (i))
+  d_f = acc_malloc (N*c_sizeof (i))
+
+  e = huge(e)
+  call acc_copyin (e, N*c_sizeof (i));
+  d_e = acc_deviceptr (e);
+
+  !$acc serial copyout(intptr)
+    intptr = init_d ()
+  !$acc end serial
+  d_d = transfer(intptr, d_d)
+  call acc_memcpy_device (d_c, d_d, N*c_sizeof (i))
+
+  !$acc serial copy(fail) copy(a) deviceptr(d_c, d_d) firstprivate(intptr)
+    block
+      integer, pointer :: cc(:), dd(:)
+      call c_f_pointer (d_c, cc, [N])
+      call c_f_pointer (d_d, dd, [N])
+      a = cc
+      do i = 1, N
+        if (dd(i) /= 27*i .or. cc(i) /= 27*i) then
+          fail = .true.
+          stop 1
+        end if
+      end do
+    end block
+  !$acc end serial
+  if (fail) error stop 1
+
+  do i = 1, N
+    a(i) = 11*i
+    b(i) = 31*i
+  end do
+
+  call acc_copyin (a, N*c_sizeof (i))
+  d_a = acc_deviceptr (a)
+  call acc_copyin_async (b, N*c_sizeof (i), acc_async_noval)
+
+  !$acc parallel deviceptr(d_c) private(i) async
+    block
+      integer, pointer :: cc(:)
+      call c_f_pointer (d_c, cc, [N])
+      !$acc loop
+      do i = 1, N
+        cc(i) = -17*i
+      end do
+    end block
+  !$acc end parallel
+
+  call acc_memcpy_device_async (d_d, d_a, N*c_sizeof (i), acc_async_noval)
+  call acc_memcpy_device_async (d_f, d_c, N*c_sizeof (i), acc_async_noval)
+  call acc_wait (acc_async_noval)
+  d_b = acc_deviceptr (b)
+  call acc_memcpy_device_async (d_e, d_b, N*c_sizeof (i), acc_async_noval)
+  call acc_wait (acc_async_noval)
+
+  !$acc serial deviceptr(d_d, d_e, d_f) private(i) copy(fail)
+    block
+    integer, pointer :: dd(:), ee(:), ff(:)
+    call c_f_pointer (d_d, dd, [N])
+    call c_f_pointer (d_e, ee, [N])
+    call c_f_pointer (d_f, ff, [N])
+    do i = 1, N
+      if (dd(i) /= 11*i        &
+          .or. ee(i) /= 31*i   &
+          .or. ff(i) /= -17*i) then
+        fail = .true.
+        stop 2
+      end if
+    end do
+    end block
+  !$acc end serial
+  if (fail) error stop 2
+end
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
index deb2c28..f6bd27a 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-13.f90
@@ -19,11 +19,10 @@ program main
         end do
       !$acc end parallel
     end do
-  !$acc end data
 
   call acc_wait_all_async (nprocs + 1)
-
   call acc_wait (nprocs + 1)
+  !$acc end data
 
   if (acc_async_test (1) .neqv. .TRUE.) stop 1
   if (acc_async_test (2) .neqv. .TRUE.) stop 2